Background

Open-Meteo maintains an API for historical weather that allows for non-commercial usage of historical weather data maintained by the website.

This file builds on _v001, _v002, and _v003 to run exploratory analysis on some historical weather data.

Functions and Libraries

The exploration process uses tidyverse, ranger, several generic custom functions, and several functions specific to Open Meteo processing. First, tidyverse, ranger, and the generic functions are loaded:

library(tidyverse) # tidyverse functionality is included throughout
## Warning: package 'ggplot2' was built under R version 4.2.3
## Warning: package 'tibble' was built under R version 4.2.3
## Warning: package 'purrr' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
## Warning: package 'stringr' was built under R version 4.2.3
## Warning: package 'lubridate' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ranger) # predict() does not work on ranger objects unless ranger has been called
## Warning: package 'ranger' was built under R version 4.2.3
source("./Generic_Added_Utility_Functions_202105_v001.R") # Basic functions

Next, specific functions written in _v001 are copied:

# Helper function for reading a partial CSV file
partialCSVRead <- function(loc, firstRow=1L, lastRow=+Inf, col_names=TRUE, ...) {
    
    # FUNCTION arguments
    # loc: file location
    # firstRow: first row that is relevant to the partial file read (whether header line or data line)
    # last Row: last row that is relevant to the partial file read (+Inf means read until last line of file)
    # col_names: the col_names parameter passed to readr::read_csv
    #            TRUE means header=TRUE (get column names from file, read data starting on next line)
    #            FALSE means header=FALSE (auto-generate column names, read data starting on first line)
    #            character vector means use these as column names (read data starting on first line)
    # ...: additional arguments passed to read_csv

    # Read the file and return
    # skip: rows to be skipped are all those prior to firstRow
    # n_max: maximum rows read are lastRow-firstRow, with an additional data row when col_names is not TRUE
    readr::read_csv(loc, 
                    col_names=col_names,
                    skip=firstRow-1, 
                    n_max=lastRow-firstRow+ifelse(isTRUE(col_names), 0, 1), 
                    ...
                    )
    
}


# Get the break points for gaps in a vector (e.g., 0, 3, 5:8, 20 has break points 0, 3, 5, 20 and 0, 3, 8, 30)
vecGaps <- function(x, addElements=c(), sortUnique=TRUE) {
    
    if(length(addElements)>0) x <- c(addElements, x)
    if(isTRUE(sortUnique)) x <- unique(sort(x))
    list("starts"=c(x[is.na(lag(x)) | x-lag(x)>1], +Inf), 
         "ends"=x[is.na(lead(x)) | lead(x)-x>1]
         )
    
}


# Find the break points in a single file
flatFileGaps <- function(loc) {

    which(stringr::str_length(readLines(loc))==0) %>% vecGaps(addElements=0)
    
}


# Read all relevant data as CSV with header
readMultiCSV <- function(loc, col_names=TRUE, ...) {

    gaps <- flatFileGaps(loc)
    
    lapply(seq_along(gaps$ends), 
           FUN=function(x) partialCSVRead(loc, 
                                          firstRow=gaps$ends[x]+1, 
                                          lastRow=gaps$starts[x+1]-1, 
                                          col_names=col_names, 
                                          ...
                                          )
           )
    
}


# Create URL with specified parameters for downloading data from Open Meteo
openMeteoURLCreate <- function(mainURL="https://archive-api.open-meteo.com/v1/archive", 
                               lat=45, 
                               lon=-90, 
                               startDate=paste(year(Sys.Date())-1, "01", "01", sep="-"), 
                               endDate=paste(year(Sys.Date())-1, "12", "31", sep="-"), 
                               hourlyMetrics=NULL, 
                               dailyMetrics=NULL,
                               tz="GMT", 
                               ...
                               ) {
    
    # Create formatted string
    fString <- paste0(mainURL, 
                      "?latitude=", 
                      lat, 
                      "&longitude=", 
                      lon, 
                      "&start_date=", 
                      startDate, 
                      "&end_date=", 
                      endDate
                      )
    if(!is.null(hourlyMetrics)) fString <- paste0(fString, "&hourly=", hourlyMetrics)
    if(!is.null(dailyMetrics)) fString <- paste0(fString, "&daily=", dailyMetrics)
    
    # Return the formatted string
    paste0(fString, "&timezone=", stringr::str_replace(tz, "/", "%2F"), ...)
    
}


# Helper function to simplify entry of parameters for Open Meteo download requests
helperOpenMeteoURL <- function(cityName=NULL,
                               lat=NULL,
                               lon=NULL,
                               hourlyMetrics=NULL,
                               hourlyIndices=NULL,
                               hourlyDesc=tblMetricsHourly,
                               dailyMetrics=NULL,
                               dailyIndices=NULL,
                               dailyDesc=tblMetricsDaily,
                               startDate=NULL, 
                               endDate=NULL, 
                               tz=NULL,
                               ...
                               ) {
    
    # Convert city to lat/lon if lat/lon are NULL
    if(is.null(lat) | is.null(lon)) {
        if(is.null(cityName)) stop("\nMust provide lat/lon or city name available in maps::us.cities\n")
        cityData <- maps::us.cities %>% tibble::as_tibble() %>% filter(name==cityName)
        if(nrow(cityData)!=1) stop("\nMust provide city name that maps uniquely to maps::us.cities$name\n")
        lat <- cityData$lat[1]
        lon <- cityData$long[1]
    }
    
    # Get hourly metrics by index if relevant
    if(is.null(hourlyMetrics) & !is.null(hourlyIndices)) {
        hourlyMetrics <- hourlyDesc %>% slice(hourlyIndices) %>% pull(metric)
        hourlyMetrics <- paste0(hourlyMetrics, collapse=",")
        cat("\nHourly metrics created from indices:", hourlyMetrics, "\n\n")
    }
    
    # Get daily metrics by index if relevant
    if(is.null(dailyMetrics) & !is.null(dailyIndices)) {
        dailyMetrics <- dailyDesc %>% slice(dailyIndices) %>% pull(metric)
        dailyMetrics <- paste0(dailyMetrics, collapse=",")
        cat("\nDaily metrics created from indices:", dailyMetrics, "\n\n")
    }
    
    # Use default values from OpenMeteoURLCreate() for startDate, endDate, and tz if passed as NULL
    if(is.null(startDate)) startDate <- eval(formals(openMeteoURLCreate)$startDate)
    if(is.null(endDate)) endDate <- eval(formals(openMeteoURLCreate)$endDate)
    if(is.null(tz)) tz <- eval(formals(openMeteoURLCreate)$tz)
    
    # Create and return URL
    openMeteoURLCreate(lat=lat,
                       lon=lon, 
                       startDate=startDate, 
                       endDate=endDate, 
                       hourlyMetrics=hourlyMetrics, 
                       dailyMetrics=dailyMetrics, 
                       tz=tz,
                       ...
                       )
    
}


# Read JSON data returned from Open Meteo
readOpenMeteoJSON <- function(js, mapDaily=tblMetricsDaily, mapHourly=tblMetricsHourly) {
    
    # FUNCTION arguments: 
    # js: JSON list returned by download from Open-Meteo
    # mapDaily: mapping file for daily metrics
    # mapHourly: mapping file for hourly metrics
    
    # Get the object and names
    jsObj <- jsonlite::read_json(js, simplifyVector = TRUE)
    nms <- jsObj %>% names()
    cat("\nObjects in JSON include:", paste(nms, collapse=", "), "\n\n")
    
    # Set default objects as NULL
    tblDaily <- NULL
    tblHourly <- NULL
    tblUnitsDaily <- NULL
    tblUnitsHourly <- NULL
    
    # Get daily and hourly as tibble if relevant
    if("daily" %in% nms) tblDaily <- jsObj$daily %>% tibble::as_tibble() %>% omProcessDaily()
    if("hourly" %in% nms) tblHourly <- jsObj$hourly %>% tibble::as_tibble() %>% omProcessHourly()
    
    # Helper function for unit conversions
    helperMetricUnit <- function(x, mapper, desc=NULL) {
        if(is.null(desc)) 
            desc <- as.list(match.call())$x %>% 
                deparse() %>% 
                stringr::str_replace_all(pattern=".*\\$", replacement="")
        x %>% 
            tibble::as_tibble() %>% 
            pivot_longer(cols=everything()) %>% 
            left_join(mapper, by=c("name"="metric")) %>% 
            mutate(value=stringr::str_replace(value, "\u00b0", "deg ")) %>% 
            mutate(metricType=desc) %>% 
            select(metricType, everything())
    }
    
    # Get the unit descriptions
    if("daily_units" %in% nms) tblUnitsDaily <- helperMetricUnit(jsObj$daily_units, mapDaily)
    if("hourly_units" %in% nms) tblUnitsHourly <- helperMetricUnit(jsObj$hourly_units, mapHourly)
    if(is.null(tblUnitsDaily) & !is.null(tblUnitsHourly)) tblUnits <- tblUnitsHourly
    else if(!is.null(tblUnitsDaily) & is.null(tblUnitsHourly)) tblUnits <- tblUnitsDaily
    else if(!is.null(tblUnitsDaily) & !is.null(tblUnitsHourly)) 
        tblUnits <- bind_rows(tblUnitsHourly, tblUnitsDaily)
    else tblUnits <- NULL
    
    # Put everything else together
    tblDescription <- jsObj[setdiff(nms, c("hourly", "hourly_units", "daily", "daily_units"))] %>%
        tibble::as_tibble()
    
    # Return the list objects
    list(tblDaily=tblDaily, tblHourly=tblHourly, tblUnits=tblUnits, tblDescription=tblDescription)
    
}


# Return Open meteo metadata in prettified format
prettyOpenMeteoMeta <- function(df, extr="tblDescription") {
    if("list" %in% class(df)) df <- df[[extr]]
    for(name in names(df)) {
        cat("\n", name, ": ", df %>% pull(name), sep="")
    }
    cat("\n\n")
}


# Process Open Meteo daily data
omProcessDaily <- function(tbl, extr="tblDaily") {
    if("list" %in% class(tbl)) tbl <- tbl[[extr]]
    tbl %>% mutate(date=lubridate::ymd(time)) %>% select(date, everything())
}


# Process Open meteo hourly data
omProcessHourly <- function(tbl, extr="tblHourly") {
    if("list" %in% class(tbl)) tbl <- tbl[[extr]]
    tbl %>% 
        mutate(origTime=time, 
               time=lubridate::ymd_hm(time), 
               date=lubridate::date(time), 
               hour=lubridate::hour(time)
               ) %>% 
        select(time, date, hour, everything())
}


# Simple predictive model for categorical variable
simpleOneVarPredict <- function(df, 
                                tgt, 
                                prd, 
                                dfTest=NULL,
                                nPrint=30, 
                                showPlot=TRUE, 
                                returnData=TRUE
                                ) {
    
    # FUNCTION ARGUMENTS:
    # df: data frame or tibble with key elements (training data set)
    # tgt: target variable
    # prd: predictor variable
    # dfTest: test dataset for applying predictions
    # nPrint: maximum number of lines of confusion matrix to print
    #         0 means do not print any summary statistics
    # showPlot: boolean, should overlap plot be created and shown?
    
    # Counts of predictor to target variable
    dfPred <- df %>%
        group_by(across(all_of(c(prd, tgt)))) %>%
        summarize(n=n(), .groups="drop") %>%
        arrange(across(all_of(prd)), desc(n)) %>%
        group_by(across(all_of(prd))) %>%
        mutate(correct=row_number()==1, predicted=first(get(tgt))) %>%
        ungroup()

    # Confusion matrix and accuracy
    dfConf <- dfPred %>%
        group_by(across(all_of(c(tgt, "correct")))) %>%
        summarize(n=sum(n), .groups="drop") %>%
        pivot_wider(id_cols=tgt, names_from=correct, values_from=n, values_fill=0) %>%
        mutate(n=`TRUE`+`FALSE`, 
               pctCorrect=`TRUE`/n, 
               pctNaive=1/(nrow(.)), 
               lift=pctCorrect/pctNaive-1
               )
    
    # Overall confusion matrix
    dfConfAll <- dfConf %>%
        summarize(nMax=max(n), across(c(`FALSE`, `TRUE`, "n"), sum)) %>%
        mutate(pctCorrect=`TRUE`/n, 
               pctNaive=nMax/n, 
               lift=pctCorrect/pctNaive-1, 
               nBucket=length(unique(dfPred[[prd]]))
               )
    
    # Print confusion matrices
    if(nPrint > 0) {
        cat("\nAccuracy by target subgroup (training data):\n")
        dfConf %>% print(n=nPrint)
        cat("\nOverall Accuracy (training data):\n")
        dfConfAll %>% print(n=nPrint)
    }
    
    # Plot of overlaps
    if(isTRUE(showPlot)) {
        p1 <- dfPred %>%
            group_by(across(c(all_of(tgt), "predicted", "correct"))) %>%
            summarize(n=sum(n), .groups="drop") %>%
            ggplot(aes(x=get(tgt), y=predicted)) + 
            labs(x="Actual", 
                 y="Predicted", 
                 title=paste0("Training data - Actual vs. predicted ", tgt), 
                 subtitle=paste0("(using ", prd, ")")
                 ) + 
            geom_text(aes(label=n)) + 
            geom_tile(aes(fill=correct), alpha=0.25)
        print(p1)
    }
    
    # Create metrics for test dataset if requested
    if(!is.null(dfTest)) {
        # Get maximum category from training data
        mostPredicted <- count(dfPred, predicted, wt=n) %>% slice(1) %>% pull(predicted)
        # Get mapping of metric to prediction
        dfPredict <- dfPred %>% 
            group_by(across(all_of(c(prd, "predicted")))) %>% 
            summarize(n=sum(n), .groups="drop")
        # Create predictions for test data
        dfPredTest <- dfTest %>%
            select(all_of(c(prd, tgt))) %>%
            left_join(select(dfPredict, -n)) %>%
            replace_na(list(predicted=mostPredicted)) %>%
            group_by(across(all_of(c(prd, tgt, "predicted")))) %>%
            summarize(n=n(), .groups="drop") %>%
            mutate(correct=(get(tgt)==predicted))
        # Create confusion statistics for test data
        dfConfTest <- dfPredTest %>%
            group_by(across(all_of(c(tgt, "correct")))) %>%
            summarize(n=sum(n), .groups="drop") %>%
            pivot_wider(id_cols=tgt, names_from=correct, values_from=n, values_fill=0) %>%
            mutate(n=`TRUE`+`FALSE`, 
                   pctCorrect=`TRUE`/n, 
                   pctNaive=1/(nrow(.)), 
                   lift=pctCorrect/pctNaive-1
                   )
        # Overall confusion matrix for test data
        dfConfAllTest <- dfConfTest %>%
            summarize(nMax=max(n), across(c(`FALSE`, `TRUE`, "n"), sum)) %>%
            mutate(pctCorrect=`TRUE`/n, 
                   pctNaive=nMax/n, 
                   lift=pctCorrect/pctNaive-1, 
                   nBucket=length(unique(dfConfTest[[prd]]))
               )
        # Print confusion matrices
        if(nPrint > 0) {
            cat("\nAccuracy by target subgroup (testing data):\n")
            dfConfTest %>% print(n=nPrint)
            cat("\nOverall Accuracy (testing data):\n")
            dfConfAllTest %>% print(n=nPrint)
            }
    } else {
        dfPredTest <- NULL
        dfConfTest <- NULL
        dfConfAllTest <- NULL
        
    }
    
    # Return data if requested
    if(isTRUE(returnData)) list(dfPred=dfPred, 
                                dfConf=dfConf, 
                                dfConfAll=dfConfAll, 
                                dfPredTest=dfPredTest, 
                                dfConfTest=dfConfTest, 
                                dfConfAllTest=dfConfAllTest
                                )
    
}


# Fit a single predictor to a single categorical variable
simpleOneVarFit <- function(df, 
                            tgt, 
                            prd, 
                            rankType="last", 
                            naMethod=TRUE
                            ) {
    
    # FUNCTION ARGUMENTS:
    # df: data frame or tibble with key elements (training data set)
    # tgt: target variable
    # prd: predictor variable
    # rankType: method for breaking ties of same n, passed to base::rank as ties.method=
    # naMethod: method for handling NA in ranks, passed to base::rank as na.last=
    
    # Counts of predictor to target variable, and associated predictions
    df %>%
        group_by(across(all_of(c(prd, tgt)))) %>%
        summarize(n=n(), .groups="drop") %>%
        arrange(across(all_of(prd)), desc(n), across(all_of(tgt))) %>%
        group_by(across(all_of(prd))) %>%
        mutate(rankN=n()+1-rank(n, ties.method=rankType, na.last=naMethod)) %>%
        arrange(across(all_of(prd)), rankN) %>%
        ungroup()

}


# Create categorical predictions mapper
simpleOneVarMapper <- function(df, tgt, prd) {
    
    # FUNCTION ARGUMENTS:
    # df: data frame or tibble from SimpleOneVarFit()
    # tgt: target variable
    # prd: predictor variable
    
    # Get the most common actual results
    dfCommon <- df %>% count(across(all_of(tgt)), wt=n, sort=TRUE)
    
    # Get the predictions
    dfPredictor <- df %>%
        group_by(across(all_of(prd))) %>%
        filter(row_number()==1) %>%
        select(all_of(c(prd, tgt))) %>%
        ungroup()
    
    list(dfPredictor=dfPredictor, dfCommon=dfCommon)
    
}


# Map the categorical predictions to unseen data
simpleOneVarApplyMapper <- function(df, 
                                    tgt,
                                    prd, 
                                    mapper, 
                                    mapperDF="dfPredictor", 
                                    mapperDefault="dfCommon",
                                    prdName="predicted"
                                    ) {
    
    # FUNCTION ARGUMENTS:
    # df: data frame containing prd for predicting tgt
    # tgt: target variable in df
    # prd: predictor variable in df
    # mapper: mapping list from sinpleOneVarMapper()
    # mapperDF: element that can be used to merge mappings
    # mapperDefault: element that can be used for NA resulting from merging mapperDF
    # prdName: name for the prediction variable
    
    # Extract the mapper and default value
    vecRename <- c(prdName) %>% purrr::set_names(tgt)
    dfMap <- mapper[[mapperDF]] %>% select(all_of(c(prd, tgt))) %>% colRenamer(vecRename=vecRename)
    chrDefault <- mapper[[mapperDefault]] %>% slice(1) %>% pull(tgt)
    
    # Merge mappings to df
    df %>%
        left_join(dfMap, by=prd) %>%
        replace_na(list("predicted"=chrDefault))
    
}


# Create confusion matrix data for categorical predictions
simpleOneVarConfusionData <- function(df, 
                                      tgtOrig,
                                      tgtPred, 
                                      otherVars=c(),
                                      weightBy="n"
                                      ) {
    
    # FUNCTION ARGUMENTS:
    # df: data frame from simpleOneVarApplyMapper()
    # tgtOrig: original target variable name in df
    # tgtPred: predicted target variable name in df
    # otherVars: other variables to be kept (will be grouping variables)
    # weightBy: weighting variable for counts in df (NULL means count each row of df as 1)
    
    # Confusion matrix data creation
    df %>%
        group_by(across(all_of(c(tgtOrig, tgtPred, otherVars)))) %>%
        summarize(n=if(!is.null(weightBy)) sum(get(weightBy)) else n(), .groups="drop") %>%
        mutate(correct=get(tgtOrig)==get(tgtPred))
    
}


# Print and plot confusion matrix for categorical predictions
simpleOneVarConfusionReport <- function(df, 
                                        tgtOrig,
                                        tgtPred, 
                                        otherVars=c(), 
                                        printConf=TRUE,
                                        printConfOrig=printConf, 
                                        printConfPred=printConf,
                                        printConfOverall=printConf, 
                                        plotConf=TRUE, 
                                        plotDesc="",
                                        nBucket=NA, 
                                        predictorVarName="", 
                                        returnData=FALSE
                                        ) {
    
    # FUNCTION ARGUMENTS:
    # df: data frame from simpleOneVarConfusionData()
    # tgtOrig: original target variable name in df
    # tgtPred: predicted target variable name in df
    # otherVars: other variables to be kept (will be grouping variables) - NOT IMPLEMENTED
    # printConf: boolean, should confusion matrix data be printed? Applies to all three
    # printConfOrig: boolean, should confusion data be printed based on original target variable?
    # printConfPred: boolean, should confusion data be printed based on predicted target variable?
    # printConfOverall: boolean, should overall confusion data be printed?
    # plotConf: boolean, should confusion overlap data be plotted?
    # plotDesc: descriptive label to be included in front of plot title
    # nBucket: number of buckets used for prediction (pass from previous data)
    # predictorVarName: variable name to be included in chart description
    # returnData: boolean, should the confusion matrices be returned?
    
    # Confusion data based on original target variable
    if(isTRUE(printConfOrig) | isTRUE(returnData)) {
        dfConfOrig <- df %>%
            group_by(across(all_of(c(tgtOrig)))) %>%
            summarize(right=sum(n*correct), wrong=sum(n)-right, n=sum(n), .groups="drop") %>%
            mutate(pctRight=right/n, pctNaive=n/(sum(n)), lift=pctRight/pctNaive-1)
    }

    # Confusion data based on predicted target variable
    if(isTRUE(printConfPred) | isTRUE(returnData)) {
        dfConfPred <- df %>%
            group_by(across(all_of(c(tgtPred)))) %>%
            summarize(right=sum(n*correct), wrong=sum(n)-right, n=sum(n), .groups="drop") %>%
            mutate(pctRight=right/n)
    }

    # Overall confusion data
    if(isTRUE(printConfOverall) | isTRUE(returnData)) {
        maxNaive <- df %>%
            group_by(across(all_of(tgtOrig))) %>%
            summarize(n=sum(n), .groups="drop") %>%
            arrange(desc(n)) %>%
            slice(1) %>%
            pull(n)
        dfConfOverall <- df %>%
            summarize(right=sum(n*correct), wrong=sum(n)-right, n=sum(n), .groups="drop") %>%
            mutate(maxN=maxNaive, pctRight=right/n, pctNaive=maxN/n, lift=pctRight/pctNaive-1, nBucket=nBucket)
    }
    
    # Confusion report based on original target variable
    if(isTRUE(printConfOrig)) {
        cat("\nConfusion data based on original target variable:", tgtOrig, "\n")
        dfConfOrig %>%
            print(n=50)
    }

    # Confusion report based on predicted target variable
    if(isTRUE(printConfPred)) {
        cat("\nConfusion data based on predicted target variable:", tgtPred, "\n")
        dfConfPred %>%
            print(n=50)
    }
    
    # Overall confusion matrix
    if(isTRUE(printConfOverall)) {
        cat("\nOverall confusion matrix\n")
        dfConfOverall %>%
            print(n=50)
    }
    
    # Plot of overlaps
    if(isTRUE(plotConf)) {
        p1 <- df %>%
            group_by(across(all_of(c(tgtOrig, tgtPred, "correct")))) %>%
            summarize(n=sum(n), .groups="drop") %>%
            ggplot(aes(x=get(tgtOrig), y=get(tgtPred))) + 
            labs(x="Actual", 
                 y="Predicted", 
                 title=paste0(plotDesc, "Actual vs. predicted ", tgtOrig), 
                 subtitle=paste0("(using ", predictorVarName, ")")
                 ) + 
            geom_text(aes(label=n)) + 
            geom_tile(aes(fill=correct), alpha=0.25)
        print(p1)
    }
    
    # Return data if requested
    if(isTRUE(returnData)) list(dfConfOrig=dfConfOrig, dfConfPred=dfConfPred, dfConfOverall=dfConfOverall)
    
}


# Process for chaining predictor, applier, and confusion matrix for categorical variables
simpleOneVarChain <- function(df,
                              tgt,
                              prd,
                              mapper=NULL, 
                              rankType="last", 
                              naMethod=TRUE, 
                              printReport=TRUE, 
                              plotDesc="",
                              returnData=TRUE, 
                              includeConfData=FALSE
                              ) {

    # FUNCTION ARGUMENTS:
    # df: data frame or tibble with key elements (training or testing data set)
    # tgt: target variable
    # prd: predictor variable
    # mapper: mapping file to be applied for predictions (NULL means create from simpleOneVarApply())
    # rankType: method for breaking ties of same n, passed to base::rank as ties.method=
    # naMethod: method for handling NA in ranks, passed to base::rank as na.last=    
    # printReport: boolean, should the confusion report data and plot be printed?
    # plotDesc: descriptive label to be included in front of plot title
    # returnData: boolean, should data elements be returned?
    # includeConfData: boolean, should confusion data be returned?
    
    # Create the summary of predictor-target-n
    dfFit <- simpleOneVarFit(df, tgt=tgt, prd=prd, rankType=rankType, naMethod=naMethod)     

    # Create the mapper if it does not already exist
    if(is.null(mapper)) mapper <- simpleOneVarMapper(dfFit, tgt=tgt, prd=prd)
    
    # Apply mapper to data
    dfApplied <- simpleOneVarApplyMapper(dfFit, tgt=tgt, prd=prd, mapper=mapper)

    # Create confusion data
    dfConfusion <- simpleOneVarConfusionData(dfApplied, tgtOrig=tgt, tgtPred="predicted")
    
    # Create confusion report if requested
    if(isTRUE(printReport) | isTRUE(includeConfData)) {
        dfConfReport <- simpleOneVarConfusionReport(df=dfConfusion, 
                                                    tgtOrig=tgt, 
                                                    tgtPred="predicted", 
                                                    nBucket=length(unique(dfApplied[[prd]])), 
                                                    predictorVarName=prd, 
                                                    printConf=printReport, 
                                                    plotConf=printReport,
                                                    plotDesc=plotDesc,
                                                    returnData=includeConfData
                                                    )
    }
    
    # Return data if requested
    if(isTRUE(returnData)) {
        ret <- list(dfFit=dfFit, mapper=mapper, dfApplied=dfApplied, dfConfusion=dfConfusion)
        if(isTRUE(includeConfData)) ret<-c(ret, list(dfConfData=dfConfReport))
        ret
    }
    
}


# Adds a train-test component for single variable predictions
simpleOneVarTrainTest <- function(dfTrain,
                                  dfTest,
                                  tgt,
                                  prd,
                                  rankType="last", 
                                  naMethod=TRUE, 
                                  printReport=FALSE, 
                                  includeConfData=TRUE, 
                                  returnData=TRUE
                              ) {

    # FUNCTION ARGUMENTS:
    # dfTrain: data frame or tibble with key elements (training data set)
    # dfTest: data frame or tibble with key elements (testing data set)
    # tgt: target variable
    # prd: predictor variable
    # rankType: method for breaking ties of same n, passed to base::rank as ties.method=
    # naMethod: method for handling NA in ranks, passed to base::rank as na.last=    
    # printReport: boolean, should the confusion report data and plot be printed?
    # includeConfData: boolean, should confusion data be returned?
    # returnData: boolean, should data elements be returned?
    
    # Fit the training data
    tmpTrain <- simpleOneVarChain(df=dfTrain, 
                                  tgt=tgt, 
                                  prd=prd,
                                  rankType=rankType,
                                  naMethod=naMethod,
                                  printReport=printReport,
                                  plotDesc="Training data: ",
                                  returnData=TRUE,
                                  includeConfData=includeConfData
                                  )
    
    # Fit the testing data
    tmpTest <- simpleOneVarChain(df=dfTest, 
                                 tgt=tgt, 
                                 prd=prd,
                                 mapper=tmpTrain$mapper,
                                 rankType=rankType,
                                 naMethod=naMethod,
                                 printReport=printReport,
                                 plotDesc="Testing data: ",
                                 returnData=TRUE,
                                 includeConfData=includeConfData
                                 )
    
    # Return data if requested
    if(isTRUE(returnData)) list(tmpTrain=tmpTrain, tmpTest=tmpTest)
    
}


# Plot the means by cluster and variable for a k-means object
plotClusterMeans <- function(km, nrow=NULL, ncol=NULL, scales="fixed") {

    # FUNCTION ARGUMENTS
    # km: object returned by stats::kmeans(...)
    # nrow: number of rows for faceting (NULL means default)
    # ncol: number of columns for faceting (NULL means default)
    # scales: passed to facet_wrap as scales=scales
    
    # Assess clustering by dimension
    p1 <- km$centers %>%
        tibble::as_tibble() %>%
        mutate(cluster=row_number()) %>%
        pivot_longer(cols=-c(cluster)) %>%
        ggplot(aes(x=fct_reorder(name, 
                                 value, 
                                 .fun=function(a) ifelse(length(a)==2, a[2]-a[1], diff(range(a)))
                                 ), 
                   y=value
                   )
               ) + 
        geom_point(aes(color=factor(cluster))) + 
        scale_color_discrete("Cluster") + 
        facet_wrap(~factor(cluster), nrow=nrow, ncol=ncol, scales=scales) +
        labs(title=paste0("Cluster means (kmeans, centers=", nrow(km$centers), ")"), 
             x="Metric", 
             y="Cluster mean"
             ) + 
        geom_hline(yintercept=median(km$centers), lty=2) +
        coord_flip()
    print(p1)
    
}


# Plot percentage by cluster
plotClusterPct <- function(df, km, keyVars, nRowFacet=1, printPlot=TRUE) {
    
    # FUNCTION ARGUMENTS:
    # df: data frame initially passed to stats::kmeans(...)
    # km: object returned by stats::kmeans(...)
    # keyVars: character vector of length 1 (y-only, x will be cl) or length 2 (x, y, cl will facet)
    # nRowFacet: number of rows for facetting (only relevant if length(keyVars) is 2)
    # printPlot: boolean, should plot be printed? (if not true, plot will be returned)
    
    # Check length of keyVars
    if(!(length(keyVars) %in% c(1, 2))) stop("\nArgument keyVars must be length-1 or length-2\n")
    
    p1 <- df %>%
        mutate(cl=factor(km$cluster)) %>%
        group_by(across(c(all_of(keyVars), "cl"))) %>%
        summarize(n=n(), .groups="drop") %>%
        group_by(across(all_of(keyVars))) %>%
        mutate(pct=n/sum(n)) %>%
        ungroup() %>%
        ggplot() + 
        scale_fill_continuous(low="white", high="green") + 
        labs(title=paste0("Percentage by cluster (kmeans with ", nrow(km$centers), " centers)"), 
             x=ifelse(length(keyVars)==1, "Cluster", keyVars[1]), 
             y=ifelse(length(keyVars)==1, keyVars[1], keyVars[2])
             )
    if(length(keyVars)==1) p1 <- p1 + geom_tile(aes(fill=pct, x=cl, y=get(keyVars[1])))
    if(length(keyVars)==2) {
        p1 <- p1 + 
            geom_tile(aes(fill=pct, x=get(keyVars[1]), y=get(keyVars[2]))) + 
            facet_wrap(~cl, nrow=nRowFacet)
    }
    
    if(isTRUE(printPlot)) print(p1)
    else return(p1)
    
}


# Run k-means (or use passed k-means object) and plot centers and percentages of observations
runKMeans <- function(df, 
                      km=NULL,
                      vars=NULL, 
                      centers=2, 
                      nStart=1L, 
                      iter.max=10L, 
                      seed=NULL, 
                      plotMeans=FALSE,
                      nrowMeans=NULL,
                      plotPct=NULL, 
                      nrowPct=1, 
                      returnKM=is.null(km)
                      ) {
    
    # FUNCTION ARGUMENTS:
    # df: data frame for clustering
    # km: k-means object (will shut off k-means processing and run as plot-only)
    # vars: variables to be used for clustering (NULL means everything in df)
    # centers: number of centers
    # nStart: passed to kmeans
    # iter.max: passed to kmeans
    # seed: seed to be set (if NULL, no seed is set)
    # plotMeans: boolean, plot variable means by cluster?
    # nrowMeans: argument passed as nrow for faceting rows in plotClusterMeans() - NULL is default ggplot2
    # plotPct: list of character vectors to be passed sequentially as keyVars to plotClusterPct()
    #          NULL means do not run
    #          pctByCluster=list(c("var1"), c("var2", "var3")) will run plotting twice
    # nrowPct: argument for faceting number of rows in plotClusterPct()
    # returnKM: boolean, should the k-means object be returned?
    
    # Set seed if requested
    if(!is.null(seed)) set.seed(seed)
    
    # Get the variable names if passed as NULL
    if(is.null(vars)) vars <- names(df)
    
    # Run the k-means process if the object has not been passed
    if(is.null(km)) {
        km <- df %>%
            select(all_of(vars)) %>% 
            kmeans(centers=centers, iter.max=iter.max, nstart=nStart)
    }

    # Assess clustering by dimension if requested
    if(isTRUE(plotMeans)) plotClusterMeans(km, nrow=nrowMeans)
    if(!is.null((plotPct))) 
        for(ctr in 1:length(plotPct)) 
            plotClusterPct(df=df, km=km, keyVars=plotPct[[ctr]], nRowFacet=nrowPct)
    
    # Return the k-means object
    if(isTRUE(returnKM)) return(km)
    
}


# Assign points to closest center of a passed k-means object
assignKMeans <- function(km, df, returnAllDistanceData=FALSE) {
    
    # FUNCTION ARGUMENTS:
    # km: a k-means object
    # df: data frame or tibble
    # returnAllDistanceData: boolean, should the distance data and clusters be returned?
    #                        TRUE returns a data frame with distances as V1, V2, ..., and cluster as cl
    #                        FALSE returns a vector of cluster assignments as integers
    
    # Select columns from df to match km
    df <- df %>% select(all_of(colnames(km$centers)))
    if(!all.equal(names(df), colnames(km$centers))) stop("\nName mismatch in clustering and frame\n")
    
    # Create the distances and find clusters
    distClust <- sapply(seq_len(nrow(km$centers)), 
                        FUN=function(x) sqrt(rowSums(sweep(as.matrix(df), 
                                                           2, 
                                                           t(as.matrix(km$centers[x,,drop=FALSE]))
                                                           )**2
                                                     )
                                             )
                        ) %>% 
        as.data.frame() %>% 
        tibble::as_tibble() %>% 
        mutate(cl=apply(., 1, which.min))
    
    # Return the proper file
    if(isTRUE(returnAllDistanceData)) return(distClust)
    else return(distClust$cl)
    
}

As well, specific functions from _v002 and _v003 are copied:

runSimpleRF <- function(df, yVar, xVars=NULL, ...) {

    # FUNCTION ARGUMENTS:
    # df: data frame containing observations
    # yVar: variable to be predicted (numeric for regression, categorical for classification)
    # xVars: predictor variables (NULL means everything in df except for yVar)
    # ...: other arguments passed to ranger::ranger
    
    # Create xVars if passed as NULL
    if(is.null(xVars)) xVars <- setdiff(names(df), yVar)
    
    # Simple random forest model
    ranger::ranger(as.formula(paste0(yVar, "~", paste0(xVars, collapse="+"))), 
                   data=df[, c(yVar, xVars)], 
                   ...
                   )
    
}

plotRFImportance <- function(rf, 
                             impName="variable.importance", 
                             divBy=1000, 
                             plotTitle=NULL, 
                             plotData=TRUE, 
                             returnData=!isTRUE(plotData)
                             ) {
    
    # FUNCTION ARGUMENTS:
    # rf: output list from random forest with an element for importance
    # impName: name of the element to extract from rf
    # divBy: divisor for the importance variable
    # plotTitle: title for plot (NULL means use default)
    # plotData: boolean, should the importance plot be created and printed?
    # returnData: boolean, should the processed data be returned?
    
    # Create title if not provided
    if(is.null(plotTitle)) plotTitle <- "Importance for simple random forest"

    # Create y-axis label
    yAxisLabel="Variable Importance"
    if(!isTRUE(all.equal(divBy, 1))) yAxisLabel <- paste0(yAxisLabel, " (", divBy, "s)")
    
    # Create variable importance
    df <- rf[[impName]] %>% 
        as.data.frame() %>% 
        purrr::set_names("imp") %>% 
        rownames_to_column("metric") %>% 
        tibble::as_tibble() 
    
    # Create and print plot if requested
    if(isTRUE(plotData)) {
        p1 <- df %>%
            ggplot(aes(x=fct_reorder(metric, imp), y=imp/divBy)) + 
            geom_col(fill="lightblue") + 
            labs(x=NULL, y=yAxisLabel, title=plotTitle) +
            coord_flip()
        print(p1)
    }
    
    # Return data if requested
    if(isTRUE(returnData)) return(df)
    
}

predictRF <- function(rf, df, newCol="pred", predsOnly=FALSE) {
    
    # FUNCTION ARGUMENTS:
    # rf: a trained random forest model
    # df: data frame for adding predictions
    # newCol: name for new column to be added to df
    # predsOnly: boolean, should only the vector of predictions be returned?
    #            if FALSE, a column named newCol is added to df, with df returned

    # Performance on holdout data
    preds <- predict(rf, data=df)$predictions
    
    # Return just the predictions if requested otherwise add as final column to df
    if(isTRUE(predsOnly)) return(preds)
    else {
        df[newCol] <- preds
        return(df)
    }
    
}

# Update for continuous variables
reportAccuracy <- function(df, 
                           trueCol, 
                           predCol="pred", 
                           reportAcc=TRUE, 
                           rndReport=2, 
                           useLabel="requested data",
                           returnAcc=!isTRUE(reportAcc), 
                           reportR2=FALSE
                           ) {
    
    # FUNCTION ARGUMENTS:
    # df: data frame containing actual and predictions
    # trueCol: column containing true value
    # predCol: column containing predicted value
    # reportAcc: boolean, should accuracy be reported (printed to output)?
    # rndReport: number of significant digits for reporting (will be converted to percentage first)
    # useLabel: label for data to be used in reporting
    # returnAcc: boolean, should the accuracy be returned 
    #            return value is not converted to percentage, not rounded
    # reportR2: boolean, should accuracy be calculated as R-squared?
    #           (default FALSE measures as categorical)
    
    # Continuous or categorical reporting
    if(isTRUE(reportR2)) {
        tc <- df %>% pull(get(trueCol))
        pc <- df %>% pull(get(predCol))
        mseNull <- mean((tc-mean(tc))**2)
        msePred <- mean((tc-pc)**2)
        r2 <- 1 - msePred/mseNull
        if(isTRUE(reportAcc)) 
            cat("\nR-squared of ", 
                useLabel, 
                " is: ", 
                round(100*r2, rndReport), 
                "% (RMSE ",
                round(sqrt(msePred), 2), 
                " vs. ", 
                round(sqrt(mseNull), 2),
                " null)\n", 
                sep=""
                )
        acc <- c("mseNull"=mseNull, "msePred"=msePred, "r2"=r2)
    } else {
        acc <- mean(df[trueCol]==df[predCol])
        if(isTRUE(reportAcc)) 
            cat("\nAccuracy of ", useLabel, " is: ", round(100*acc, rndReport), "%\n", sep="")    
    }
    
    # Return accuracy statistic if requested
    if(isTRUE(returnAcc)) return(acc)
    
}

# Update for automated rounding
plotConfusion <- function(df, 
                          trueCol, 
                          predCol="pred", 
                          useTitle=NULL,
                          useSub=NULL, 
                          plotCont=FALSE, 
                          rndTo=NULL,
                          rndBucketsAuto=100,
                          nSig=NULL,
                          refXY=FALSE
                          ) {
    
    # FUNCTION ARGUMENTS:
    # df: data frame containing actual and predictions
    # trueCol: column containing true value
    # predCol: column containing predicted value
    # useTitle: title to be used for chart (NULL means create from trueCol)
    # useSub: subtitle to be used for chart (NULL means none)
    # plotCont: boolean, should plotting assume continuous variables?
    #           (default FALSE assumes confusion plot for categorical variables)
    # rndTo: every number in x should be rounded to the nearest rndTo
    #        NULL means no rounding (default)
    #        -1L means make an estimate based on data
    # rndBucketsAuto: integer, if rndTo is -1L, about how many buckets are desired for predictions?
    # nSig: number of significant digits for automatically calculated rounding parameter
    #       (NULL means calculate exactly)
    # refXY: boolean, should a reference line for y=x be included? (relevant only for continuous)
    
    # Create title if not supplied
    if(is.null(useTitle)) useTitle <- paste0("Predicting ", trueCol)

    # Function auto-round returns vector as-is when rndTo is NULL and auto-rounds when rndTo is -1L
    df <- df %>%
        mutate(across(all_of(c(trueCol, predCol)), 
                      .fns=function(x) autoRound(x, rndTo=rndTo, rndBucketsAuto=rndBucketsAuto, nSig=nSig)
                      )
               )
    
    # Create base plot (applicable to categorical or continuous variables)
    # Use x as true and y as predicted, for more meaningful geom_smooth() if continuous
    # Flip coordinates if categorical
    p1 <- df %>%
        group_by(across(all_of(c(trueCol, predCol)))) %>%
        summarize(n=n(), .groups="drop") %>%
        ggplot(aes(y=get(predCol), x=get(trueCol))) + 
        labs(y="Predicted", x="Actual", title=useTitle, subtitle=useSub)
        
    # Update plot as appropriate
    if(isTRUE(plotCont)) {
        p1 <- p1 +
            geom_point(aes(size=n), alpha=0.5) + 
            scale_size_continuous("# Obs") +
            geom_smooth(aes(weight=n), method="lm")
        if(isTRUE(refXY)) p1 <- p1 + geom_abline(slope=1, intercept=0, lty=2, color="red")
    } else {
        p1 <- p1 + 
            geom_tile(aes(fill=n)) + 
            geom_text(aes(label=n), size=2.5) +
            coord_flip() +
            scale_fill_continuous("", low="white", high="green")
    }
    
    # Output plot
    print(p1)
    
}

runFullRF <- function(dfTrain, 
                      yVar, 
                      xVars, 
                      dfTest=dfTrain,
                      useLabel="test data",
                      useSub=NULL, 
                      isContVar=FALSE,
                      rndTo=NULL,
                      rndBucketsAuto=100,
                      nSig=NULL,
                      refXY=FALSE,
                      makePlots=TRUE,
                      plotImp=makePlots,
                      plotConf=makePlots,
                      returnData=FALSE, 
                      ...
                      ) {
    
    # FUNCTION ARGUMENTS:
    # dfTrain: training data
    # yVar: dependent variable
    # xVars: column(s) containing independent variables
    # dfTest: test dataset for applying predictions
    # useLabel: label to be used for reporting accuracy
    # useSub: subtitle to be used for confusion chart (NULL means none)
    # isContVar: boolean, is the variable continuous? (default FALSE means categorical)
    # rndTo: every number in x should be rounded to the nearest rndTo
    #        NULL means no rounding (default)
    #        -1L means make an estimate based on data
    # rndBucketsAuto: integer, if rndTo is -1L, about how many buckets are desired for predictions?
    # nSig: number of significant digits for automatically calculated rounding parameter
    #       (NULL means calculate exactly)    
    # refXY: boolean, should a reference line for y=x be included? (relevant only for continuous)
    # makePlots: boolean, should plots be created for variable importance and confusion matrix?
    # plotImp: boolean, should variable importance be plotted? (default is makePlots)
    # plotConf: boolean, should confusion matrix be plotted? (default is makePlots)
    # returnData: boolean, should data be returned?
    # ...: additional parameters to pass to runSimpleRF(), which are then passed to ranger::ranger()

    # 1. Run random forest using impurity for importance
    rf <- runSimpleRF(df=dfTrain, yVar=yVar, xVars=xVars, importance="impurity", ...)

    # 2. Create, and optionally plot, variable importance
    rfImp <- plotRFImportance(rf, plotData=plotImp, returnData=TRUE)

    # 3. Predict on test dataset
    tstPred <- predictRF(rf=rf, df=dfTest)

    # 4. Report on accuracy (updated for continuous or categorical)
    rfAcc <- reportAccuracy(tstPred, 
                            trueCol=yVar, 
                            rndReport=3, 
                            useLabel=useLabel, 
                            reportR2=isTRUE(isContVar),
                            returnAcc=TRUE
                            )

    # 5. Plot confusion data (updated for continuous vs. categorical) if requested
    if(isTRUE(plotConf)) {
        plotConfusion(tstPred, 
                      trueCol=yVar, 
                      useSub=useSub, 
                      plotCont=isTRUE(isContVar), 
                      rndTo=rndTo, 
                      rndBucketsAuto=rndBucketsAuto,
                      nSig=nSig,
                      refXY=refXY
                      )
    }
    
    #6. Return data if requested
    if(isTRUE(returnData)) return(list(rf=rf, rfImp=rfImp, tstPred=tstPred, rfAcc=rfAcc))
    
}

runPartialImportanceRF <- function(dfTrain, 
                                   yVar, 
                                   dfTest,
                                   impDB=dfImp,
                                   nImp=+Inf,
                                   otherX=c(),
                                   isContVar=TRUE, 
                                   useLabel=keyLabel, 
                                   useSub=stringr::str_to_sentence(keyLabel), 
                                   rndTo=NULL,
                                   rndBucketsAuto=50,
                                   nSig=NULL,
                                   refXY=FALSE,
                                   makePlots=FALSE, 
                                   returnElem=c("rfImp", "rfAcc")
                                   ) {
    
    # FUNCTION ARGUMENTS
    # dfTrain: training data
    # yVar: y variable in dfTrain
    # dfTest: test data
    # impDB: tibble containing variable importance by dependent variable
    # nImp: use the top nImp variables by variable importance
    # otherX: include these additional x variables
    # isContVar: boolean, is this a continuous variable (regression)? FALSE means classification
    # useLabel: label for description
    # useSub: label for plot
    # rndTo: controls the rounding parameter for plots, passed to runFullRF 
    #        (NULL means no rounding)
    #        -1L means make an estimate based on underlying data
    # rndBucketsAuto: integer, if rndTo is -1L, about how many buckets are desired for predictions?
    # nSig: number of significant digits for automatically calculated rounding parameter
    #       (NULL means calculate exactly)    
    # refXY: controls the reference line parameter for plots, passed to runFullRF
    # makePlots: boolean, should plots be created?
    # returnElem: character vector of list elements to be returned

    runFullRF(dfTrain=dfTrain, 
              yVar=yVar, 
              xVars=unique(c(impDB %>% filter(n<=nImp, src==yVar) %>% pull(metric), otherX)), 
              dfTest=dfTest, 
              isContVar = isContVar, 
              useLabel=useLabel, 
              useSub=useSub, 
              rndTo=rndTo,
              rndBucketsAuto=rndBucketsAuto,
              nSig=nSig,
              refXY=refXY,
              makePlots=makePlots,
              returnData=TRUE
              )[returnElem]
    
}

autoRound <- function(x, rndTo=-1L, rndBucketsAuto=100, nSig=NULL) {

    # FUNCTION ARGUMENTS
    # x: vector to be rounded
    # rndTo: every number in x should be rounded to the nearest rndTo
    #        NULL means no rounding
    #        -1L means make an estimate based on data (default)
    # rndBucketsAuto: integer, if rndTo is -1L, about how many buckets are desired for predictions?
    # nSig: number of significant digits for automatically calculated rounding parameter
    #       (NULL means calculate exactly)
    
    # If rndTo is passed as NULL, return x as-is
    if(is.null(rndTo)) return(x)
    
    # If rndTo is passed as -1L, make an estimate for rndTo
    if(isTRUE(all.equal(-1L, rndTo))) {
        # Get the number of unique values in x
        nUq <- length(unique(x))
        # If the number of unique values is no more than 150% of rndToBucketsAuto, return as-is
        if(nUq <= 1.5*rndBucketsAuto) return(x)
        # Otherwise, calculate a value for rndTo
        rndTo <- diff(range(x)) / rndBucketsAuto
        # Truncate to requested number of significant digits
        if(!is.null(nSig)) rndTo <- signif(rndTo, digits=nSig)
    }
    
    # Return the rounded vector if it was not already returned
    return(round(x/rndTo)*rndTo)

}


autoPartialImportance <- function(dfTrain, 
                                  dfTest, 
                                  yVar, 
                                  isContVar,
                                  impDB=dfImp,
                                  impNums=c(1:10, 16, 25, nrow(filter(dfImp, src==yVar)))
                                  ) {
    
    # FUNCTION ARGUMENTS:
    # dfTrain: training data
    # dfTest: test (holdout) data
    # yVar: dependent variable
    # isContVar: boolean, is this a contnuous variable (R-2) or categorical variable (accuracy)?
    # impDB: tibble containing sorted variable importances by predictor
    # impNums: vector of number of variables to run (each element in vector run)
    
    # Accuracy on holdout data
    tblRPI <- tibble::tibble(nImp=impNums, 
                             rfAcc=sapply(impNums, 
                                          FUN=function(x) {y <- runPartialImportanceRF(dfTrain=dfTrain, 
                                                                                       yVar=yVar, 
                                                                                       dfTest=dfTest, 
                                                                                       isContVar=isContVar, 
                                                                                       impDB=impDB, 
                                                                                       nImp=x, 
                                                                                       makePlots=FALSE
                                                                                       )[["rfAcc"]]
                                                           if(isTRUE(isContVar)) y <- y["r2"]
                                                           y
                                                           }
                                          )
                             )
    print(tblRPI)

    # Plot of holdout accuracy/r-squared vs. number of variables
    # if(isTRUE(isContVar)) tblRPI <- tblRPI %>% mutate(rfAcc=r2)
    if(isTRUE(isContVar)) prtDesc <- "R-squared" else prtDesc <- "Accuracy"
    p1 <- tblRPI %>%
        select(nImp, rfAcc) %>%
        bind_rows(tibble::tibble(nImp=0, rfAcc=0)) %>%
        ggplot(aes(x=nImp, y=rfAcc)) + 
        geom_line() + 
        geom_point() + 
        labs(title=paste0(prtDesc, " on holdout data vs. number of predictors"), 
             subtitle=paste0("Predicting ", yVar),
             y=paste0(prtDesc, " on holdout data"), 
             x="# Predictors (selected in order of variable importance in full model)"
             ) + 
        lims(y=c(0, 1)) + 
        geom_hline(data=~filter(., rfAcc==max(rfAcc)), aes(yintercept=rfAcc), lty=2)
    print(p1)
    
    return(tblRPI)
    
}


runNextBestPredictor <- function(varsRun, 
                                 xFix, 
                                 yVar, 
                                 isContVar,
                                 dfTrain,
                                 dfTest=dfTrain, 
                                 useLabel="predictions based on training data applied to holdout dataset",
                                 useSub=stringr::str_to_sentence(keyLabel_v3), 
                                 makePlots=FALSE
                                 ) {
    
    # FUNCTION ARGUMENTS:
    # varsRun: variables to be run as potential next-best predictors
    # xFix: variables that are already included in every test of next-best
    # yVar: dependent variable of interest
    # isContVar: boolean, is yvar continuous?
    # dfTrain: training data
    # dfTest: test data
    # useLabel: descriptive label
    # useSub: subtitle description
    # makePlots: boolean, should plots be created for each predictor run?
    
    vecAcc <- sapply(varsRun, FUN=function(x) {
        y <- runFullRF(dfTrain=dfTrain, 
                  yVar=yVar, 
                  xVars=c(xFix, x),
                  dfTest=dfTest, 
                  useLabel=useLabel, 
                  useSub=useSub,
                  isContVar=isContVar,
                  makePlots=makePlots,
                  returnData=TRUE
                  )[["rfAcc"]]
        if(isTRUE(isContVar)) y[["r2"]] else y
        }
        )

    vecAcc %>% 
        as.data.frame() %>% 
        purrr::set_names("rfAcc") %>% 
        rownames_to_column("pred") %>% 
        tibble::tibble() %>%
        arrange(desc(rfAcc)) %>%
        print(n=40)
    
    vecAcc

}


getNextBestVar <- function(x, returnTbl=FALSE, n=if(isTRUE(returnTbl)) +Inf else 1) {
    
    # FUNCTION ARGUMENTS:
    # x: named vector of accuracy or r-squared
    # returnTbl: boolean, if TRUE convert to tibble and return, if FALSE return vector of top-n predictors 
    # n: number of predictrs to return (+Inf will return the full tibble or vector)
    
    tbl <- vecToTibble(x, colNameName="pred") %>%
        arrange(-value) %>%
        slice_head(n=n)
    if(isTRUE(returnTbl)) return(tbl)
    else return(tbl %>% pull(pred))
    
}


newCityPredict <- function(rf, 
                           dfTest, 
                           trueCol, 
                           isContVar=FALSE,
                           reportR2=isTRUE(isContVar), 
                           plotCont=isTRUE(isContVar), 
                           reportAcc=TRUE, 
                           rndReport=2, 
                           useLabel="requested data",
                           useTitle=NULL,
                           useSub=NULL, 
                           rndTo=NULL,
                           rndBucketsAuto=100,
                           nSig=NULL,
                           refXY=FALSE, 
                           returnData=TRUE
                           ) {
    
    # FUNCTION ARGUMENTS:
    # rf: The existing "ranger" model OR a list containing element "rf" that has the existing "ranger" model
    # dfTest: the new dataset for predictions
    # trueCol: column containing true value
    # isContVar: boolean, is the variable continuous? (default FALSE means categorical)
    # reportR2: boolean, should accuracy be calculated as R-squared?
    #           (FALSE measures as categorical)
    # plotCont: boolean, should plotting assume continuous variables?
    #           (FALSE assumes confusion plot for categorical variables)
    # reportAcc: boolean, should accuracy be reported (printed to output)?
    # rndReport: number of significant digits for reporting (will be converted to percentage first)
    # useLabel: label for data to be used in reporting
    # useTitle: title to be used for chart (NULL means create from trueCol)
    # useSub: subtitle to be used for chart (NULL means none)
    # rndTo: every number in x should be rounded to the nearest rndTo
    #        NULL means no rounding (default)
    #        -1L means make an estimate based on data
    # rndBucketsAuto: integer, if rndTo is -1L, about how many buckets are desired for predictions?
    # nSig: number of significant digits for automatically calculated rounding parameter
    #       (NULL means calculate exactly)
    # refXY: boolean, should a reference line for y=x be included? (relevant only for continuous)
    # returnData: boolean, should a list be returned containing tstPred and rfAcc?
    
    # Get the ranger data
    if(!("ranger" %in% class(rf))) {
        if(!("rf" %in% names(rf))) {
            stop("\nERROR: rf must be of class 'ranger' OR a list with element 'rf' that is of class 'ranger")
        }
        rf <- rf[["rf"]]
    }
    if(!("ranger" %in% class(rf)))
        stop("\nERROR: rf must be of class 'ranger' OR a list with element 'rf' that is of class 'ranger")
    
    # Predict on new dataset
    tstPred <- predictRF(rf=rf, df=dfTest)

    # Report on accuracy
    rfAcc <- reportAccuracy(tstPred, 
                            trueCol=trueCol, 
                            reportAcc=reportAcc,
                            rndReport=rndReport, 
                            useLabel=useLabel, 
                            reportR2=reportR2,
                            returnAcc=TRUE
                            )

    # Plot confusion data
    plotConfusion(tstPred, 
                  trueCol=trueCol, 
                  useTitle=useTitle,
                  useSub=useSub, 
                  plotCont=plotCont, 
                  rndTo=rndTo,
                  rndBucketsAuto=rndBucketsAuto,
                  nSig=nSig,
                  refXY=refXY
                  )
    
    # Return data if requested
    if(isTRUE(returnData)) return(list(tstPred=tstPred, rfAcc=rfAcc))
    
}

Key mapping tables for available metrics are also copied:

hourlyMetrics <- "temperature_2m,relativehumidity_2m,dewpoint_2m,apparent_temperature,pressure_msl,surface_pressure,precipitation,rain,snowfall,cloudcover,cloudcover_low,cloudcover_mid,cloudcover_high,shortwave_radiation,direct_radiation,direct_normal_irradiance,diffuse_radiation,windspeed_10m,windspeed_100m,winddirection_10m,winddirection_100m,windgusts_10m,et0_fao_evapotranspiration,weathercode,vapor_pressure_deficit,soil_temperature_0_to_7cm,soil_temperature_7_to_28cm,soil_temperature_28_to_100cm,soil_temperature_100_to_255cm,soil_moisture_0_to_7cm,soil_moisture_7_to_28cm,soil_moisture_28_to_100cm,soil_moisture_100_to_255cm"
dailyMetrics <- "weathercode,temperature_2m_max,temperature_2m_min,apparent_temperature_max,apparent_temperature_min,precipitation_sum,rain_sum,snowfall_sum,precipitation_hours,sunrise,sunset,windspeed_10m_max,windgusts_10m_max,winddirection_10m_dominant,shortwave_radiation_sum,et0_fao_evapotranspiration"

hourlyDescription <- "Air temperature at 2 meters above ground\nRelative humidity at 2 meters above ground\nDew point temperature at 2 meters above ground\nApparent temperature is the perceived feels-like temperature combining wind chill factor, relative humidity and solar radiation\nAtmospheric air pressure reduced to mean sea level (msl) or pressure at surface. Typically pressure on mean sea level is used in meteorology. Surface pressure gets lower with increasing elevation.\nAtmospheric air pressure reduced to mean sea level (msl) or pressure at surface. Typically pressure on mean sea level is used in meteorology. Surface pressure gets lower with increasing elevation.\nTotal precipitation (rain, showers, snow) sum of the preceding hour. Data is stored with a 0.1 mm precision. If precipitation data is summed up to monthly sums, there might be small inconsistencies with the total precipitation amount.\nOnly liquid precipitation of the preceding hour including local showers and rain from large scale systems.\nSnowfall amount of the preceding hour in centimeters. For the water equivalent in millimeter, divide by 7. E.g. 7 cm snow = 10 mm precipitation water equivalent\nTotal cloud cover as an area fraction\nLow level clouds and fog up to 2 km altitude\nMid level clouds from 2 to 6 km altitude\nHigh level clouds from 6 km altitude\nShortwave solar radiation as average of the preceding hour. This is equal to the total global horizontal irradiation\nDirect solar radiation as average of the preceding hour on the horizontal plane and the normal plane (perpendicular to the sun)\nDirect solar radiation as average of the preceding hour on the horizontal plane and the normal plane (perpendicular to the sun)\nDiffuse solar radiation as average of the preceding hour\nWind speed at 10 or 100 meters above ground. Wind speed on 10 meters is the standard level.\nWind speed at 10 or 100 meters above ground. Wind speed on 10 meters is the standard level.\nWind direction at 10 or 100 meters above ground\nWind direction at 10 or 100 meters above ground\nGusts at 10 meters above ground of the indicated hour. Wind gusts in CERRA are defined as the maximum wind gusts of the preceding hour. Please consult the ECMWF IFS documentation for more information on how wind gusts are parameterized in weather models.\nET0 Reference Evapotranspiration of a well watered grass field. Based on FAO-56 Penman-Monteith equations ET0 is calculated from temperature, wind speed, humidity and solar radiation. Unlimited soil water is assumed. ET0 is commonly used to estimate the required irrigation for plants.\nWeather condition as a numeric code. Follow WMO weather interpretation codes. See table below for details. Weather code is calculated from cloud cover analysis, precipitation and snowfall. As barely no information about atmospheric stability is available, estimation about thunderstorms is not possible.\nVapor Pressure Deificit (VPD) in kilopascal (kPa). For high VPD (>1.6), water transpiration of plants increases. For low VPD (<0.4), transpiration decreases\nAverage temperature of different soil levels below ground.\nAverage temperature of different soil levels below ground.\nAverage temperature of different soil levels below ground.\nAverage temperature of different soil levels below ground.\nAverage soil water content as volumetric mixing ratio at 0-7, 7-28, 28-100 and 100-255 cm depths.\nAverage soil water content as volumetric mixing ratio at 0-7, 7-28, 28-100 and 100-255 cm depths.\nAverage soil water content as volumetric mixing ratio at 0-7, 7-28, 28-100 and 100-255 cm depths.\nAverage soil water content as volumetric mixing ratio at 0-7, 7-28, 28-100 and 100-255 cm depths."
dailyDescription <- "The most severe weather condition on a given day\nMaximum and minimum daily air temperature at 2 meters above ground\nMaximum and minimum daily air temperature at 2 meters above ground\nMaximum and minimum daily apparent temperature\nMaximum and minimum daily apparent temperature\nSum of daily precipitation (including rain, showers and snowfall)\nSum of daily rain\nSum of daily snowfall\nThe number of hours with rain\nSun rise and set times\nSun rise and set times\nMaximum wind speed and gusts on a day\nMaximum wind speed and gusts on a day\nDominant wind direction\nThe sum of solar radiaion on a given day in Megajoules\nDaily sum of ET0 Reference Evapotranspiration of a well watered grass field"

# Create tibble for hourly metrics
tblMetricsHourly <- tibble::tibble(metric=hourlyMetrics %>% str_split_1(","), 
                                   description=hourlyDescription %>% str_split_1("\n")
                                   )
tblMetricsHourly %>% 
    print(n=50)
## # A tibble: 33 × 2
##    metric                        description                                    
##    <chr>                         <chr>                                          
##  1 temperature_2m                Air temperature at 2 meters above ground       
##  2 relativehumidity_2m           Relative humidity at 2 meters above ground     
##  3 dewpoint_2m                   Dew point temperature at 2 meters above ground 
##  4 apparent_temperature          Apparent temperature is the perceived feels-li…
##  5 pressure_msl                  Atmospheric air pressure reduced to mean sea l…
##  6 surface_pressure              Atmospheric air pressure reduced to mean sea l…
##  7 precipitation                 Total precipitation (rain, showers, snow) sum …
##  8 rain                          Only liquid precipitation of the preceding hou…
##  9 snowfall                      Snowfall amount of the preceding hour in centi…
## 10 cloudcover                    Total cloud cover as an area fraction          
## 11 cloudcover_low                Low level clouds and fog up to 2 km altitude   
## 12 cloudcover_mid                Mid level clouds from 2 to 6 km altitude       
## 13 cloudcover_high               High level clouds from 6 km altitude           
## 14 shortwave_radiation           Shortwave solar radiation as average of the pr…
## 15 direct_radiation              Direct solar radiation as average of the prece…
## 16 direct_normal_irradiance      Direct solar radiation as average of the prece…
## 17 diffuse_radiation             Diffuse solar radiation as average of the prec…
## 18 windspeed_10m                 Wind speed at 10 or 100 meters above ground. W…
## 19 windspeed_100m                Wind speed at 10 or 100 meters above ground. W…
## 20 winddirection_10m             Wind direction at 10 or 100 meters above ground
## 21 winddirection_100m            Wind direction at 10 or 100 meters above ground
## 22 windgusts_10m                 Gusts at 10 meters above ground of the indicat…
## 23 et0_fao_evapotranspiration    ET0 Reference Evapotranspiration of a well wat…
## 24 weathercode                   Weather condition as a numeric code. Follow WM…
## 25 vapor_pressure_deficit        Vapor Pressure Deificit (VPD) in kilopascal (k…
## 26 soil_temperature_0_to_7cm     Average temperature of different soil levels b…
## 27 soil_temperature_7_to_28cm    Average temperature of different soil levels b…
## 28 soil_temperature_28_to_100cm  Average temperature of different soil levels b…
## 29 soil_temperature_100_to_255cm Average temperature of different soil levels b…
## 30 soil_moisture_0_to_7cm        Average soil water content as volumetric mixin…
## 31 soil_moisture_7_to_28cm       Average soil water content as volumetric mixin…
## 32 soil_moisture_28_to_100cm     Average soil water content as volumetric mixin…
## 33 soil_moisture_100_to_255cm    Average soil water content as volumetric mixin…
# Create tibble for daily metrics
tblMetricsDaily <- tibble::tibble(metric=dailyMetrics %>% str_split_1(","), 
                                  description=dailyDescription %>% str_split_1("\n")
                                   )
tblMetricsDaily
## # A tibble: 16 × 2
##    metric                     description                                       
##    <chr>                      <chr>                                             
##  1 weathercode                The most severe weather condition on a given day  
##  2 temperature_2m_max         Maximum and minimum daily air temperature at 2 me…
##  3 temperature_2m_min         Maximum and minimum daily air temperature at 2 me…
##  4 apparent_temperature_max   Maximum and minimum daily apparent temperature    
##  5 apparent_temperature_min   Maximum and minimum daily apparent temperature    
##  6 precipitation_sum          Sum of daily precipitation (including rain, showe…
##  7 rain_sum                   Sum of daily rain                                 
##  8 snowfall_sum               Sum of daily snowfall                             
##  9 precipitation_hours        The number of hours with rain                     
## 10 sunrise                    Sun rise and set times                            
## 11 sunset                     Sun rise and set times                            
## 12 windspeed_10m_max          Maximum wind speed and gusts on a day             
## 13 windgusts_10m_max          Maximum wind speed and gusts on a day             
## 14 winddirection_10m_dominant Dominant wind direction                           
## 15 shortwave_radiation_sum    The sum of solar radiaion on a given day in Megaj…
## 16 et0_fao_evapotranspiration Daily sum of ET0 Reference Evapotranspiration of …

A function is written to process saved data for later use:

formatOpenMeteoJSON <- function(x, 
                                glimpseData=TRUE, 
                                addVars=FALSE, 
                                addExtract="tblHourly", 
                                showStats=addVars
                                ) {
    
    # FUNCTION ARGUMENTS:
    # x: Saved json file for passage to readOpenMeteoJSON
    # glimpseData: boolean, should a glimpse of the file and metadata be shown?
    # addVars: boolean, should variables be added for later processing?
    # addExtract: list elemented to be extracted (relevant only for addVars=TRUE)
    # showStats: boolean, should counts of key elements be shown (relevant only for addVars=TRUE)

    # Read file
    lst <- readOpenMeteoJSON(x)
    
    # Show a glimpse if requested
    if(isTRUE(glimpseData)) {
        print(lst)
        prettyOpenMeteoMeta(lst)
    }
    
    # If no variables to be added, return the file
    if(!isTRUE(addVars)) return(lst)
    
    # Add statistics
    df <- lst[[addExtract]] %>%
        mutate(year=year(date), 
               month=factor(month.abb[lubridate::month(date)], levels=month.abb), 
               hour=lubridate::hour(time), 
               fct_hour=factor(hour), 
               tod=ifelse(hour>=7 & hour<=18, "Day", "Night"), 
               doy=yday(date),
               season=case_when(month %in% c("Mar", "Apr", "May") ~ "Spring", 
                                month %in% c("Jun", "Jul", "Aug") ~ "Summer", 
                                month %in% c("Sep", "Oct", "Nov") ~ "Fall", 
                                month %in% c("Dec", "Jan", "Feb") ~ "Winter", 
                                TRUE~"typo"
                                ), 
               todSeason=paste0(season, "-", tod), 
               tod=factor(tod, levels=c("Day", "Night")), 
               season=factor(season, levels=c("Spring", "Summer", "Fall", "Winter")), 
               todSeason=factor(todSeason, 
                                levels=paste0(rep(c("Spring", "Summer", "Fall", "Winter"), each=2), 
                                              "-", 
                                              c("Day", "Night")
                                              )
                                ),
               across(where(is.numeric), .fns=function(x) round(100*percent_rank(x)), .names="pct_{.col}")
               )
    
    # Show counts if requested
    if(isTRUE(showStats)) {
        # Glimpse file
        glimpse(df)
        # Counts of day-of-year/month
        p1 <- df %>% 
            count(doy, month) %>% 
            ggplot(aes(y=doy, x=month)) + 
            geom_boxplot(aes(weight=n), fill="lightblue") + 
            labs(title="Observations by day-of-year and month", x=NULL, y="Day of Year")
        print(p1)
        # Counts of year/month
        p2 <- df %>% 
            count(year, month) %>% 
            ggplot(aes(y=factor(year), x=month)) + 
            geom_tile(aes(fill=n)) + 
            geom_text(aes(label=n), size=3) + 
            scale_fill_continuous("# Records", low="white", high="green") + 
            labs(title="Records by year and month", x=NULL, y=NULL)
        print(p2)
        # Counts of todSeason-season-tod, hour-fct_hour-tod, and month-season
        df %>% count(todSeason, season, tod) %>% print()
        df %>% count(hour, fct_hour, tod) %>% print(n=30)
        df %>% count(month, season) %>% print()
    }
    
    # Return the file
    df
    
}

Core daily datasets are loaded:

# Read daily JSON file
nycOMDaily <- formatOpenMeteoJSON("testOM_daily_nyc.json")
## 
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, daily_units, daily 
## 
## $tblDaily
## # A tibble: 4,914 × 18
##    date       time       weathercode temperature_2m_max temperature_2m_min
##    <date>     <chr>            <int>              <dbl>              <dbl>
##  1 2010-01-01 2010-01-01          73                5                 -1.4
##  2 2010-01-02 2010-01-02          71               -0.6               -9.2
##  3 2010-01-03 2010-01-03          71               -4.8              -10  
##  4 2010-01-04 2010-01-04           1               -0.8               -7.3
##  5 2010-01-05 2010-01-05           1               -0.2               -7.3
##  6 2010-01-06 2010-01-06           2                1.1               -5.3
##  7 2010-01-07 2010-01-07           2                3.6               -3.7
##  8 2010-01-08 2010-01-08          71                1.9               -5.7
##  9 2010-01-09 2010-01-09           0               -1.4               -7.7
## 10 2010-01-10 2010-01-10           0               -1.7              -10.3
## # ℹ 4,904 more rows
## # ℹ 13 more variables: apparent_temperature_max <dbl>,
## #   apparent_temperature_min <dbl>, precipitation_sum <dbl>, rain_sum <dbl>,
## #   snowfall_sum <dbl>, precipitation_hours <dbl>, sunrise <chr>, sunset <chr>,
## #   windspeed_10m_max <dbl>, windgusts_10m_max <dbl>,
## #   winddirection_10m_dominant <int>, shortwave_radiation_sum <dbl>,
## #   et0_fao_evapotranspiration <dbl>
## 
## $tblHourly
## NULL
## 
## $tblUnits
## # A tibble: 17 × 4
##    metricType  name                       value      description                
##    <chr>       <chr>                      <chr>      <chr>                      
##  1 daily_units time                       "iso8601"  <NA>                       
##  2 daily_units weathercode                "wmo code" The most severe weather co…
##  3 daily_units temperature_2m_max         "deg C"    Maximum and minimum daily …
##  4 daily_units temperature_2m_min         "deg C"    Maximum and minimum daily …
##  5 daily_units apparent_temperature_max   "deg C"    Maximum and minimum daily …
##  6 daily_units apparent_temperature_min   "deg C"    Maximum and minimum daily …
##  7 daily_units precipitation_sum          "mm"       Sum of daily precipitation…
##  8 daily_units rain_sum                   "mm"       Sum of daily rain          
##  9 daily_units snowfall_sum               "cm"       Sum of daily snowfall      
## 10 daily_units precipitation_hours        "h"        The number of hours with r…
## 11 daily_units sunrise                    "iso8601"  Sun rise and set times     
## 12 daily_units sunset                     "iso8601"  Sun rise and set times     
## 13 daily_units windspeed_10m_max          "km/h"     Maximum wind speed and gus…
## 14 daily_units windgusts_10m_max          "km/h"     Maximum wind speed and gus…
## 15 daily_units winddirection_10m_dominant "deg "     Dominant wind direction    
## 16 daily_units shortwave_radiation_sum    "MJ/m²"    The sum of solar radiaion …
## 17 daily_units et0_fao_evapotranspiration "mm"       Daily sum of ET0 Reference…
## 
## $tblDescription
## # A tibble: 1 × 7
##   latitude longitude generationtime_ms utc_offset_seconds timezone        
##      <dbl>     <dbl>             <dbl>              <int> <chr>           
## 1     40.7     -73.9              101.             -14400 America/New_York
## # ℹ 2 more variables: timezone_abbreviation <chr>, elevation <dbl>
## 
## 
## latitude: 40.7
## longitude: -73.9
## generationtime_ms: 100.914
## utc_offset_seconds: -14400
## timezone: America/New_York
## timezone_abbreviation: EDT
## elevation: 36
laxOMDaily <- formatOpenMeteoJSON("testOM_daily_lax.json")
## 
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, daily_units, daily 
## 
## $tblDaily
## # A tibble: 5,113 × 18
##    date       time       weathercode temperature_2m_max temperature_2m_min
##    <date>     <chr>            <int>              <dbl>              <dbl>
##  1 2010-01-01 2010-01-01           2               20.1                4.7
##  2 2010-01-02 2010-01-02           1               23.2                6.7
##  3 2010-01-03 2010-01-03           1               23                  6.5
##  4 2010-01-04 2010-01-04           2               22.1                6.5
##  5 2010-01-05 2010-01-05           1               22.9                5  
##  6 2010-01-06 2010-01-06           2               23.2                7.7
##  7 2010-01-07 2010-01-07           1               23.3                5.2
##  8 2010-01-08 2010-01-08           1               22.8                8.4
##  9 2010-01-09 2010-01-09           2               21.5                7.2
## 10 2010-01-10 2010-01-10           1               24                  7.5
## # ℹ 5,103 more rows
## # ℹ 13 more variables: apparent_temperature_max <dbl>,
## #   apparent_temperature_min <dbl>, precipitation_sum <dbl>, rain_sum <dbl>,
## #   snowfall_sum <dbl>, precipitation_hours <dbl>, sunrise <chr>, sunset <chr>,
## #   windspeed_10m_max <dbl>, windgusts_10m_max <dbl>,
## #   winddirection_10m_dominant <int>, shortwave_radiation_sum <dbl>,
## #   et0_fao_evapotranspiration <dbl>
## 
## $tblHourly
## NULL
## 
## $tblUnits
## # A tibble: 17 × 4
##    metricType  name                       value      description                
##    <chr>       <chr>                      <chr>      <chr>                      
##  1 daily_units time                       "iso8601"  <NA>                       
##  2 daily_units weathercode                "wmo code" The most severe weather co…
##  3 daily_units temperature_2m_max         "deg C"    Maximum and minimum daily …
##  4 daily_units temperature_2m_min         "deg C"    Maximum and minimum daily …
##  5 daily_units apparent_temperature_max   "deg C"    Maximum and minimum daily …
##  6 daily_units apparent_temperature_min   "deg C"    Maximum and minimum daily …
##  7 daily_units precipitation_sum          "mm"       Sum of daily precipitation…
##  8 daily_units rain_sum                   "mm"       Sum of daily rain          
##  9 daily_units snowfall_sum               "cm"       Sum of daily snowfall      
## 10 daily_units precipitation_hours        "h"        The number of hours with r…
## 11 daily_units sunrise                    "iso8601"  Sun rise and set times     
## 12 daily_units sunset                     "iso8601"  Sun rise and set times     
## 13 daily_units windspeed_10m_max          "km/h"     Maximum wind speed and gus…
## 14 daily_units windgusts_10m_max          "km/h"     Maximum wind speed and gus…
## 15 daily_units winddirection_10m_dominant "deg "     Dominant wind direction    
## 16 daily_units shortwave_radiation_sum    "MJ/m²"    The sum of solar radiaion …
## 17 daily_units et0_fao_evapotranspiration "mm"       Daily sum of ET0 Reference…
## 
## $tblDescription
## # A tibble: 1 × 7
##   latitude longitude generationtime_ms utc_offset_seconds timezone           
##      <dbl>     <dbl>             <dbl>              <int> <chr>              
## 1     34.1     -118.              58.9             -25200 America/Los_Angeles
## # ℹ 2 more variables: timezone_abbreviation <chr>, elevation <dbl>
## 
## 
## latitude: 34.13005
## longitude: -118.4981
## generationtime_ms: 58.85398
## utc_offset_seconds: -25200
## timezone: America/Los_Angeles
## timezone_abbreviation: PDT
## elevation: 333
chiOMDaily <- formatOpenMeteoJSON("testOM_daily_chi.json")
## 
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, daily_units, daily 
## 
## $tblDaily
## # A tibble: 5,113 × 18
##    date       time       weathercode temperature_2m_max temperature_2m_min
##    <date>     <chr>            <int>              <dbl>              <dbl>
##  1 2010-01-01 2010-01-01           3               -8.6              -13.4
##  2 2010-01-02 2010-01-02           2              -10.4              -15.1
##  3 2010-01-03 2010-01-03           3               -7.9              -13.8
##  4 2010-01-04 2010-01-04           3               -6.9              -12.3
##  5 2010-01-05 2010-01-05           3               -4.8               -9.8
##  6 2010-01-06 2010-01-06          71               -4.9               -9  
##  7 2010-01-07 2010-01-07          73               -5.2               -8.5
##  8 2010-01-08 2010-01-08          73               -3                 -9.4
##  9 2010-01-09 2010-01-09           3               -5.8              -12.3
## 10 2010-01-10 2010-01-10           3               -8.8              -19.4
## # ℹ 5,103 more rows
## # ℹ 13 more variables: apparent_temperature_max <dbl>,
## #   apparent_temperature_min <dbl>, precipitation_sum <dbl>, rain_sum <dbl>,
## #   snowfall_sum <dbl>, precipitation_hours <dbl>, sunrise <chr>, sunset <chr>,
## #   windspeed_10m_max <dbl>, windgusts_10m_max <dbl>,
## #   winddirection_10m_dominant <int>, shortwave_radiation_sum <dbl>,
## #   et0_fao_evapotranspiration <dbl>
## 
## $tblHourly
## NULL
## 
## $tblUnits
## # A tibble: 17 × 4
##    metricType  name                       value      description                
##    <chr>       <chr>                      <chr>      <chr>                      
##  1 daily_units time                       "iso8601"  <NA>                       
##  2 daily_units weathercode                "wmo code" The most severe weather co…
##  3 daily_units temperature_2m_max         "deg C"    Maximum and minimum daily …
##  4 daily_units temperature_2m_min         "deg C"    Maximum and minimum daily …
##  5 daily_units apparent_temperature_max   "deg C"    Maximum and minimum daily …
##  6 daily_units apparent_temperature_min   "deg C"    Maximum and minimum daily …
##  7 daily_units precipitation_sum          "mm"       Sum of daily precipitation…
##  8 daily_units rain_sum                   "mm"       Sum of daily rain          
##  9 daily_units snowfall_sum               "cm"       Sum of daily snowfall      
## 10 daily_units precipitation_hours        "h"        The number of hours with r…
## 11 daily_units sunrise                    "iso8601"  Sun rise and set times     
## 12 daily_units sunset                     "iso8601"  Sun rise and set times     
## 13 daily_units windspeed_10m_max          "km/h"     Maximum wind speed and gus…
## 14 daily_units windgusts_10m_max          "km/h"     Maximum wind speed and gus…
## 15 daily_units winddirection_10m_dominant "deg "     Dominant wind direction    
## 16 daily_units shortwave_radiation_sum    "MJ/m²"    The sum of solar radiaion …
## 17 daily_units et0_fao_evapotranspiration "mm"       Daily sum of ET0 Reference…
## 
## $tblDescription
## # A tibble: 1 × 7
##   latitude longitude generationtime_ms utc_offset_seconds timezone       
##      <dbl>     <dbl>             <dbl>              <int> <chr>          
## 1     41.9     -87.6              59.4             -18000 America/Chicago
## # ℹ 2 more variables: timezone_abbreviation <chr>, elevation <dbl>
## 
## 
## latitude: 41.86292
## longitude: -87.64877
## generationtime_ms: 59.38601
## utc_offset_seconds: -18000
## timezone: America/Chicago
## timezone_abbreviation: CDT
## elevation: 180
houOMDaily <- formatOpenMeteoJSON("testOM_daily_hou.json")
## 
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, daily_units, daily 
## 
## $tblDaily
## # A tibble: 5,113 × 18
##    date       time       weathercode temperature_2m_max temperature_2m_min
##    <date>     <chr>            <int>              <dbl>              <dbl>
##  1 2010-01-01 2010-01-01           3               11.8                3.9
##  2 2010-01-02 2010-01-02           1               12                  0.7
##  3 2010-01-03 2010-01-03           3               10                  4.4
##  4 2010-01-04 2010-01-04           3                7.6                1.8
##  5 2010-01-05 2010-01-05           0                8                 -1.9
##  6 2010-01-06 2010-01-06          51               12.7               -0.1
##  7 2010-01-07 2010-01-07          55               13.4               -0.2
##  8 2010-01-08 2010-01-08           2                0.8               -3  
##  9 2010-01-09 2010-01-09           0                4.4               -5.5
## 10 2010-01-10 2010-01-10           0                5.9               -4.6
## # ℹ 5,103 more rows
## # ℹ 13 more variables: apparent_temperature_max <dbl>,
## #   apparent_temperature_min <dbl>, precipitation_sum <dbl>, rain_sum <dbl>,
## #   snowfall_sum <dbl>, precipitation_hours <dbl>, sunrise <chr>, sunset <chr>,
## #   windspeed_10m_max <dbl>, windgusts_10m_max <dbl>,
## #   winddirection_10m_dominant <int>, shortwave_radiation_sum <dbl>,
## #   et0_fao_evapotranspiration <dbl>
## 
## $tblHourly
## NULL
## 
## $tblUnits
## # A tibble: 17 × 4
##    metricType  name                       value      description                
##    <chr>       <chr>                      <chr>      <chr>                      
##  1 daily_units time                       "iso8601"  <NA>                       
##  2 daily_units weathercode                "wmo code" The most severe weather co…
##  3 daily_units temperature_2m_max         "deg C"    Maximum and minimum daily …
##  4 daily_units temperature_2m_min         "deg C"    Maximum and minimum daily …
##  5 daily_units apparent_temperature_max   "deg C"    Maximum and minimum daily …
##  6 daily_units apparent_temperature_min   "deg C"    Maximum and minimum daily …
##  7 daily_units precipitation_sum          "mm"       Sum of daily precipitation…
##  8 daily_units rain_sum                   "mm"       Sum of daily rain          
##  9 daily_units snowfall_sum               "cm"       Sum of daily snowfall      
## 10 daily_units precipitation_hours        "h"        The number of hours with r…
## 11 daily_units sunrise                    "iso8601"  Sun rise and set times     
## 12 daily_units sunset                     "iso8601"  Sun rise and set times     
## 13 daily_units windspeed_10m_max          "km/h"     Maximum wind speed and gus…
## 14 daily_units windgusts_10m_max          "km/h"     Maximum wind speed and gus…
## 15 daily_units winddirection_10m_dominant "deg "     Dominant wind direction    
## 16 daily_units shortwave_radiation_sum    "MJ/m²"    The sum of solar radiaion …
## 17 daily_units et0_fao_evapotranspiration "mm"       Daily sum of ET0 Reference…
## 
## $tblDescription
## # A tibble: 1 × 7
##   latitude longitude generationtime_ms utc_offset_seconds timezone  
##      <dbl>     <dbl>             <dbl>              <int> <chr>     
## 1     29.8     -95.4              64.0             -18000 US/Central
## # ℹ 2 more variables: timezone_abbreviation <chr>, elevation <dbl>
## 
## 
## latitude: 29.77153
## longitude: -95.43555
## generationtime_ms: 63.96198
## utc_offset_seconds: -18000
## timezone: US/Central
## timezone_abbreviation: CDT
## elevation: 17

Processed hourly data for NYC and LA are loaded:

# Read hourly JSON file (NYC and LA)
nycTemp <- formatOpenMeteoJSON("testOM_hourly_nyc.json", addVars=TRUE)
## 
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, hourly_units, hourly 
## 
## $tblDaily
## NULL
## 
## $tblHourly
## # A tibble: 117,936 × 37
##    time                date        hour temperature_2m relativehumidity_2m
##    <dttm>              <date>     <int>          <dbl>               <int>
##  1 2010-01-01 00:00:00 2010-01-01     0           -1.1                  95
##  2 2010-01-01 01:00:00 2010-01-01     1           -1                    96
##  3 2010-01-01 02:00:00 2010-01-01     2           -1                    96
##  4 2010-01-01 03:00:00 2010-01-01     3           -0.8                  97
##  5 2010-01-01 04:00:00 2010-01-01     4           -0.9                  97
##  6 2010-01-01 05:00:00 2010-01-01     5           -0.8                  97
##  7 2010-01-01 06:00:00 2010-01-01     6           -0.7                  97
##  8 2010-01-01 07:00:00 2010-01-01     7           -0.5                  97
##  9 2010-01-01 08:00:00 2010-01-01     8           -0.6                  97
## 10 2010-01-01 09:00:00 2010-01-01     9           -0.6                  97
## # ℹ 117,926 more rows
## # ℹ 32 more variables: dewpoint_2m <dbl>, apparent_temperature <dbl>,
## #   pressure_msl <dbl>, surface_pressure <dbl>, precipitation <dbl>,
## #   rain <dbl>, snowfall <dbl>, cloudcover <int>, cloudcover_low <int>,
## #   cloudcover_mid <int>, cloudcover_high <int>, shortwave_radiation <dbl>,
## #   direct_radiation <dbl>, direct_normal_irradiance <dbl>,
## #   diffuse_radiation <dbl>, windspeed_10m <dbl>, windspeed_100m <dbl>, …
## 
## $tblUnits
## # A tibble: 34 × 4
##    metricType   name                 value   description                        
##    <chr>        <chr>                <chr>   <chr>                              
##  1 hourly_units time                 iso8601 <NA>                               
##  2 hourly_units temperature_2m       deg C   Air temperature at 2 meters above …
##  3 hourly_units relativehumidity_2m  %       Relative humidity at 2 meters abov…
##  4 hourly_units dewpoint_2m          deg C   Dew point temperature at 2 meters …
##  5 hourly_units apparent_temperature deg C   Apparent temperature is the percei…
##  6 hourly_units pressure_msl         hPa     Atmospheric air pressure reduced t…
##  7 hourly_units surface_pressure     hPa     Atmospheric air pressure reduced t…
##  8 hourly_units precipitation        mm      Total precipitation (rain, showers…
##  9 hourly_units rain                 mm      Only liquid precipitation of the p…
## 10 hourly_units snowfall             cm      Snowfall amount of the preceding h…
## # ℹ 24 more rows
## 
## $tblDescription
## # A tibble: 1 × 7
##   latitude longitude generationtime_ms utc_offset_seconds timezone        
##      <dbl>     <dbl>             <dbl>              <int> <chr>           
## 1     40.7     -73.9              118.             -14400 America/New_York
## # ℹ 2 more variables: timezone_abbreviation <chr>, elevation <dbl>
## 
## 
## latitude: 40.7
## longitude: -73.9
## generationtime_ms: 118.0021
## utc_offset_seconds: -14400
## timezone: America/New_York
## timezone_abbreviation: EDT
## elevation: 36
## 
## Rows: 117,936
## Columns: 80
## $ time                              <dttm> 2010-01-01 00:00:00, 2010-01-01 01:…
## $ date                              <date> 2010-01-01, 2010-01-01, 2010-01-01,…
## $ hour                              <int> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11…
## $ temperature_2m                    <dbl> -1.1, -1.0, -1.0, -0.8, -0.9, -0.8, …
## $ relativehumidity_2m               <int> 95, 96, 96, 97, 97, 97, 97, 97, 97, …
## $ dewpoint_2m                       <dbl> -1.7, -1.6, -1.6, -1.2, -1.3, -1.2, …
## $ apparent_temperature              <dbl> -3.9, -3.9, -3.9, -3.7, -3.7, -3.6, …
## $ pressure_msl                      <dbl> 1017.2, 1016.5, 1015.9, 1015.6, 1015…
## $ surface_pressure                  <dbl> 1012.6, 1011.9, 1011.3, 1011.0, 1011…
## $ precipitation                     <dbl> 0.5, 0.5, 0.4, 0.3, 0.1, 0.0, 0.0, 0…
## $ rain                              <dbl> 0.0, 0.1, 0.1, 0.1, 0.0, 0.0, 0.0, 0…
## $ snowfall                          <dbl> 0.35, 0.28, 0.21, 0.14, 0.07, 0.00, …
## $ cloudcover                        <int> 90, 93, 80, 68, 71, 100, 100, 100, 1…
## $ cloudcover_low                    <int> 2, 8, 3, 6, 15, 51, 99, 99, 96, 77, …
## $ cloudcover_mid                    <int> 98, 96, 99, 98, 95, 97, 98, 99, 94, …
## $ cloudcover_high                   <int> 97, 93, 59, 13, 0, 0, 0, 0, 0, 0, 0,…
## $ shortwave_radiation               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 53, 11…
## $ direct_radiation                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 20…
## $ direct_normal_irradiance          <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0…
## $ diffuse_radiation                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 41, 93…
## $ windspeed_10m                     <dbl> 3.1, 3.5, 3.3, 3.9, 3.5, 3.4, 0.0, 1…
## $ windspeed_100m                    <dbl> 3.8, 3.1, 3.8, 4.7, 6.4, 5.7, 1.4, 1…
## $ winddirection_10m                 <int> 339, 336, 347, 338, 336, 342, 180, 2…
## $ winddirection_100m                <int> 41, 21, 17, 356, 344, 342, 360, 217,…
## $ windgusts_10m                     <dbl> 9.0, 9.7, 10.1, 7.6, 7.6, 6.8, 5.4, …
## $ et0_fao_evapotranspiration        <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, …
## $ weathercode                       <int> 73, 73, 73, 71, 71, 3, 3, 3, 3, 3, 3…
## $ vapor_pressure_deficit            <dbl> 0.03, 0.02, 0.02, 0.02, 0.02, 0.02, …
## $ soil_temperature_0_to_7cm         <dbl> -0.7, -0.7, -0.7, -0.6, -0.6, -0.6, …
## $ soil_temperature_7_to_28cm        <dbl> 0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0…
## $ soil_temperature_28_to_100cm      <dbl> 4.2, 4.2, 4.1, 4.1, 4.1, 4.1, 4.1, 4…
## $ soil_temperature_100_to_255cm     <dbl> 10.6, 10.6, 10.6, 10.6, 10.6, 10.6, …
## $ soil_moisture_0_to_7cm            <dbl> 0.373, 0.374, 0.376, 0.377, 0.377, 0…
## $ soil_moisture_7_to_28cm           <dbl> 0.377, 0.377, 0.377, 0.377, 0.377, 0…
## $ soil_moisture_28_to_100cm         <dbl> 0.413, 0.413, 0.413, 0.413, 0.413, 0…
## $ soil_moisture_100_to_255cm        <dbl> 0.412, 0.412, 0.412, 0.412, 0.412, 0…
## $ origTime                          <chr> "2010-01-01T00:00", "2010-01-01T01:0…
## $ year                              <dbl> 2010, 2010, 2010, 2010, 2010, 2010, …
## $ month                             <fct> Jan, Jan, Jan, Jan, Jan, Jan, Jan, J…
## $ fct_hour                          <fct> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11…
## $ tod                               <fct> Night, Night, Night, Night, Night, N…
## $ doy                               <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ season                            <fct> Winter, Winter, Winter, Winter, Wint…
## $ todSeason                         <fct> Winter-Night, Winter-Night, Winter-N…
## $ pct_hour                          <dbl> 0, 4, 8, 13, 17, 21, 25, 29, 33, 38,…
## $ pct_temperature_2m                <dbl> 10, 10, 10, 11, 11, 11, 11, 12, 11, …
## $ pct_relativehumidity_2m           <dbl> 92, 94, 94, 96, 96, 96, 96, 96, 96, …
## $ pct_dewpoint_2m                   <dbl> 23, 24, 24, 25, 25, 25, 25, 25, 25, …
## $ pct_apparent_temperature          <dbl> 15, 15, 15, 15, 15, 15, 17, 17, 16, …
## $ pct_pressure_msl                  <dbl> 53, 49, 46, 44, 44, 41, 38, 36, 37, …
## $ pct_surface_pressure              <dbl> 51, 47, 44, 42, 42, 39, 36, 35, 36, …
## $ pct_precipitation                 <dbl> 93, 93, 92, 90, 86, 0, 0, 0, 0, 0, 0…
## $ pct_rain                          <dbl> 0, 87, 87, 87, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_snowfall                      <dbl> 99, 99, 99, 99, 98, 0, 0, 0, 0, 0, 0…
## $ pct_cloudcover                    <dbl> 77, 79, 72, 67, 68, 81, 81, 81, 81, …
## $ pct_cloudcover_low                <dbl> 51, 60, 53, 58, 65, 77, 90, 90, 88, …
## $ pct_cloudcover_mid                <dbl> 90, 89, 92, 90, 88, 89, 90, 92, 87, …
## $ pct_cloudcover_high               <dbl> 81, 76, 63, 49, 0, 0, 0, 0, 0, 0, 0,…
## $ pct_shortwave_radiation           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 49, 57, 6…
## $ pct_direct_radiation              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 62…
## $ pct_direct_normal_irradiance      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 61, 61…
## $ pct_diffuse_radiation             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 58, 7…
## $ pct_windspeed_10m                 <dbl> 3, 4, 3, 5, 4, 4, 0, 1, 2, 5, 8, 8, …
## $ pct_windspeed_100m                <dbl> 2, 1, 2, 3, 6, 5, 0, 0, 4, 9, 9, 8, …
## $ pct_winddirection_10m             <dbl> 94, 93, 96, 94, 93, 95, 35, 43, 53, …
## $ pct_winddirection_100m            <dbl> 8, 4, 3, 99, 96, 95, 100, 46, 51, 61…
## $ pct_windgusts_10m                 <dbl> 3, 4, 5, 1, 1, 1, 0, 0, 0, 1, 2, 4, …
## $ pct_et0_fao_evapotranspiration    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 22, 32, 4…
## $ pct_weathercode                   <dbl> 99, 99, 99, 98, 98, 69, 69, 69, 69, …
## $ pct_vapor_pressure_deficit        <dbl> 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 8, …
## $ pct_soil_temperature_0_to_7cm     <dbl> 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 9, 10,…
## $ pct_soil_temperature_7_to_28cm    <dbl> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, …
## $ pct_soil_temperature_28_to_100cm  <dbl> 16, 16, 15, 15, 15, 15, 15, 15, 15, …
## $ pct_soil_temperature_100_to_255cm <dbl> 42, 42, 42, 42, 42, 42, 42, 42, 42, …
## $ pct_soil_moisture_0_to_7cm        <dbl> 70, 71, 73, 74, 74, 74, 74, 74, 73, …
## $ pct_soil_moisture_7_to_28cm       <dbl> 69, 69, 69, 69, 69, 68, 68, 68, 68, …
## $ pct_soil_moisture_28_to_100cm     <dbl> 96, 96, 96, 96, 96, 96, 96, 96, 96, …
## $ pct_soil_moisture_100_to_255cm    <dbl> 96, 96, 96, 96, 96, 96, 96, 96, 96, …
## $ pct_year                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_doy                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …

## # A tibble: 8 × 4
##   todSeason    season tod       n
##   <fct>        <fct>  <fct> <int>
## 1 Spring-Day   Spring Day   15456
## 2 Spring-Night Spring Night 15456
## 3 Summer-Day   Summer Day   14532
## 4 Summer-Night Summer Night 14532
## 5 Fall-Day     Fall   Day   14196
## 6 Fall-Night   Fall   Night 14196
## 7 Winter-Day   Winter Day   14784
## 8 Winter-Night Winter Night 14784
## # A tibble: 24 × 4
##     hour fct_hour tod       n
##    <int> <fct>    <fct> <int>
##  1     0 0        Night  4914
##  2     1 1        Night  4914
##  3     2 2        Night  4914
##  4     3 3        Night  4914
##  5     4 4        Night  4914
##  6     5 5        Night  4914
##  7     6 6        Night  4914
##  8     7 7        Day    4914
##  9     8 8        Day    4914
## 10     9 9        Day    4914
## 11    10 10       Day    4914
## 12    11 11       Day    4914
## 13    12 12       Day    4914
## 14    13 13       Day    4914
## 15    14 14       Day    4914
## 16    15 15       Day    4914
## 17    16 16       Day    4914
## 18    17 17       Day    4914
## 19    18 18       Day    4914
## 20    19 19       Night  4914
## 21    20 20       Night  4914
## 22    21 21       Night  4914
## 23    22 22       Night  4914
## 24    23 23       Night  4914
## # A tibble: 12 × 3
##    month season     n
##    <fct> <fct>  <int>
##  1 Jan   Winter 10416
##  2 Feb   Winter  9480
##  3 Mar   Spring 10416
##  4 Apr   Spring 10080
##  5 May   Spring 10416
##  6 Jun   Summer  9720
##  7 Jul   Summer  9672
##  8 Aug   Summer  9672
##  9 Sep   Fall    9360
## 10 Oct   Fall    9672
## 11 Nov   Fall    9360
## 12 Dec   Winter  9672
laxTemp <- formatOpenMeteoJSON("testOM_hourly_lax.json", addVars=TRUE)
## 
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, hourly_units, hourly 
## 
## $tblDaily
## NULL
## 
## $tblHourly
## # A tibble: 122,712 × 37
##    time                date        hour temperature_2m relativehumidity_2m
##    <dttm>              <date>     <int>          <dbl>               <int>
##  1 2010-01-01 00:00:00 2010-01-01     0            6.3                  60
##  2 2010-01-01 01:00:00 2010-01-01     1            5.7                  62
##  3 2010-01-01 02:00:00 2010-01-01     2            5.3                  63
##  4 2010-01-01 03:00:00 2010-01-01     3            5                    64
##  5 2010-01-01 04:00:00 2010-01-01     4            4.8                  64
##  6 2010-01-01 05:00:00 2010-01-01     5            4.7                  64
##  7 2010-01-01 06:00:00 2010-01-01     6            4.7                  64
##  8 2010-01-01 07:00:00 2010-01-01     7            4.8                  64
##  9 2010-01-01 08:00:00 2010-01-01     8            5.2                  64
## 10 2010-01-01 09:00:00 2010-01-01     9            6.3                  63
## # ℹ 122,702 more rows
## # ℹ 32 more variables: dewpoint_2m <dbl>, apparent_temperature <dbl>,
## #   pressure_msl <dbl>, surface_pressure <dbl>, precipitation <dbl>,
## #   rain <dbl>, snowfall <dbl>, cloudcover <int>, cloudcover_low <int>,
## #   cloudcover_mid <int>, cloudcover_high <int>, shortwave_radiation <dbl>,
## #   direct_radiation <dbl>, direct_normal_irradiance <dbl>,
## #   diffuse_radiation <dbl>, windspeed_10m <dbl>, windspeed_100m <dbl>, …
## 
## $tblUnits
## # A tibble: 34 × 4
##    metricType   name                 value   description                        
##    <chr>        <chr>                <chr>   <chr>                              
##  1 hourly_units time                 iso8601 <NA>                               
##  2 hourly_units temperature_2m       deg C   Air temperature at 2 meters above …
##  3 hourly_units relativehumidity_2m  %       Relative humidity at 2 meters abov…
##  4 hourly_units dewpoint_2m          deg C   Dew point temperature at 2 meters …
##  5 hourly_units apparent_temperature deg C   Apparent temperature is the percei…
##  6 hourly_units pressure_msl         hPa     Atmospheric air pressure reduced t…
##  7 hourly_units surface_pressure     hPa     Atmospheric air pressure reduced t…
##  8 hourly_units precipitation        mm      Total precipitation (rain, showers…
##  9 hourly_units rain                 mm      Only liquid precipitation of the p…
## 10 hourly_units snowfall             cm      Snowfall amount of the preceding h…
## # ℹ 24 more rows
## 
## $tblDescription
## # A tibble: 1 × 7
##   latitude longitude generationtime_ms utc_offset_seconds timezone           
##      <dbl>     <dbl>             <dbl>              <int> <chr>              
## 1     34.1     -118.             6196.             -25200 America/Los_Angeles
## # ℹ 2 more variables: timezone_abbreviation <chr>, elevation <dbl>
## 
## 
## latitude: 34.13005
## longitude: -118.4981
## generationtime_ms: 6196.377
## utc_offset_seconds: -25200
## timezone: America/Los_Angeles
## timezone_abbreviation: PDT
## elevation: 333
## 
## Rows: 122,712
## Columns: 80
## $ time                              <dttm> 2010-01-01 00:00:00, 2010-01-01 01:…
## $ date                              <date> 2010-01-01, 2010-01-01, 2010-01-01,…
## $ hour                              <int> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11…
## $ temperature_2m                    <dbl> 6.3, 5.7, 5.3, 5.0, 4.8, 4.7, 4.7, 4…
## $ relativehumidity_2m               <int> 60, 62, 63, 64, 64, 64, 64, 64, 64, …
## $ dewpoint_2m                       <dbl> -0.9, -1.0, -1.2, -1.3, -1.4, -1.4, …
## $ apparent_temperature              <dbl> 2.9, 2.3, 1.8, 1.3, 1.0, 0.9, 0.9, 1…
## $ pressure_msl                      <dbl> 1026.5, 1026.1, 1025.7, 1025.7, 1024…
## $ surface_pressure                  <dbl> 985.7, 985.2, 984.8, 984.7, 983.9, 9…
## $ precipitation                     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ rain                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ snowfall                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cloudcover                        <int> 14, 21, 23, 29, 31, 30, 29, 30, 31, …
## $ cloudcover_low                    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cloudcover_mid                    <int> 0, 0, 0, 0, 1, 0, 0, 0, 2, 3, 2, 6, …
## $ cloudcover_high                   <int> 48, 71, 78, 95, 100, 99, 98, 99, 100…
## $ shortwave_radiation               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 142, …
## $ direct_radiation                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 27, 16…
## $ direct_normal_irradiance          <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0…
## $ diffuse_radiation                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 115, …
## $ windspeed_10m                     <dbl> 7.4, 7.8, 8.0, 9.7, 9.7, 10.1, 10.0,…
## $ windspeed_100m                    <dbl> 10.4, 10.6, 11.0, 14.9, 14.8, 14.6, …
## $ winddirection_10m                 <int> 14, 13, 10, 15, 15, 17, 15, 13, 13, …
## $ winddirection_100m                <int> 20, 24, 19, 20, 18, 20, 18, 18, 16, …
## $ windgusts_10m                     <dbl> 19.1, 19.1, 19.4, 19.8, 20.9, 21.6, …
## $ et0_fao_evapotranspiration        <dbl> 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, …
## $ weathercode                       <int> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ vapor_pressure_deficit            <dbl> 0.38, 0.35, 0.33, 0.31, 0.31, 0.31, …
## $ soil_temperature_0_to_7cm         <dbl> 7.0, 6.6, 6.2, 5.8, 5.6, 5.4, 5.3, 5…
## $ soil_temperature_7_to_28cm        <dbl> 10.8, 10.6, 10.3, 10.1, 9.9, 9.7, 9.…
## $ soil_temperature_28_to_100cm      <dbl> 12.9, 12.9, 12.9, 12.9, 12.9, 12.9, …
## $ soil_temperature_100_to_255cm     <dbl> 20.5, 20.5, 20.5, 20.5, 20.5, 20.5, …
## $ soil_moisture_0_to_7cm            <dbl> 0.205, 0.205, 0.205, 0.205, 0.205, 0…
## $ soil_moisture_7_to_28cm           <dbl> 0.251, 0.251, 0.251, 0.250, 0.250, 0…
## $ soil_moisture_28_to_100cm         <dbl> 0.168, 0.168, 0.168, 0.168, 0.168, 0…
## $ soil_moisture_100_to_255cm        <dbl> 0.165, 0.165, 0.165, 0.165, 0.165, 0…
## $ origTime                          <chr> "2010-01-01T00:00", "2010-01-01T01:0…
## $ year                              <dbl> 2010, 2010, 2010, 2010, 2010, 2010, …
## $ month                             <fct> Jan, Jan, Jan, Jan, Jan, Jan, Jan, J…
## $ fct_hour                          <fct> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11…
## $ tod                               <fct> Night, Night, Night, Night, Night, N…
## $ doy                               <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ season                            <fct> Winter, Winter, Winter, Winter, Wint…
## $ todSeason                         <fct> Winter-Night, Winter-Night, Winter-N…
## $ pct_hour                          <dbl> 0, 4, 8, 13, 17, 21, 25, 29, 33, 38,…
## $ pct_temperature_2m                <dbl> 4, 3, 3, 2, 2, 2, 2, 2, 3, 4, 12, 34…
## $ pct_relativehumidity_2m           <dbl> 52, 54, 55, 57, 57, 57, 57, 57, 57, …
## $ pct_dewpoint_2m                   <dbl> 15, 15, 15, 14, 14, 14, 14, 14, 15, …
## $ pct_apparent_temperature          <dbl> 4, 3, 3, 2, 2, 2, 2, 2, 2, 4, 10, 28…
## $ pct_pressure_msl                  <dbl> 100, 100, 99, 99, 99, 99, 98, 98, 98…
## $ pct_surface_pressure              <dbl> 99, 99, 99, 99, 98, 98, 97, 97, 97, …
## $ pct_precipitation                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_rain                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_snowfall                      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_cloudcover                    <dbl> 58, 63, 65, 71, 75, 73, 71, 73, 75, …
## $ pct_cloudcover_low                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_cloudcover_mid                <dbl> 0, 0, 0, 0, 76, 0, 0, 0, 78, 80, 78,…
## $ pct_cloudcover_high               <dbl> 80, 84, 85, 91, 96, 95, 94, 95, 96, …
## $ pct_shortwave_radiation           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 59, 6…
## $ pct_direct_radiation              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 49, 55, 6…
## $ pct_direct_normal_irradiance      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 49, 54, 6…
## $ pct_diffuse_radiation             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 86, 9…
## $ pct_windspeed_10m                 <dbl> 61, 64, 65, 77, 77, 79, 79, 79, 79, …
## $ pct_windspeed_100m                <dbl> 60, 61, 63, 81, 80, 80, 79, 79, 78, …
## $ pct_winddirection_10m             <dbl> 6, 5, 3, 7, 7, 8, 7, 5, 5, 5, 7, 9, …
## $ pct_winddirection_100m            <dbl> 8, 10, 8, 8, 7, 8, 7, 7, 6, 4, 4, 4,…
## $ pct_windgusts_10m                 <dbl> 51, 51, 52, 53, 56, 58, 58, 59, 58, …
## $ pct_et0_fao_evapotranspiration    <dbl> 34, 34, 34, 34, 34, 34, 34, 34, 34, …
## $ pct_weathercode                   <dbl> 0, 63, 63, 63, 63, 63, 63, 63, 63, 6…
## $ pct_vapor_pressure_deficit        <dbl> 31, 29, 28, 27, 27, 27, 26, 27, 27, …
## $ pct_soil_temperature_0_to_7cm     <dbl> 3, 3, 2, 2, 2, 1, 1, 1, 1, 2, 5, 15,…
## $ pct_soil_temperature_7_to_28cm    <dbl> 6, 6, 5, 4, 4, 3, 3, 2, 2, 2, 2, 2, …
## $ pct_soil_temperature_28_to_100cm  <dbl> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, …
## $ pct_soil_temperature_100_to_255cm <dbl> 64, 64, 64, 64, 64, 64, 64, 64, 64, …
## $ pct_soil_moisture_0_to_7cm        <dbl> 83, 83, 83, 83, 83, 83, 83, 83, 83, …
## $ pct_soil_moisture_7_to_28cm       <dbl> 87, 87, 87, 87, 87, 87, 87, 87, 87, …
## $ pct_soil_moisture_28_to_100cm     <dbl> 56, 56, 56, 56, 56, 56, 56, 56, 56, …
## $ pct_soil_moisture_100_to_255cm    <dbl> 34, 34, 34, 34, 34, 34, 34, 34, 34, …
## $ pct_year                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_doy                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …

## # A tibble: 8 × 4
##   todSeason    season tod       n
##   <fct>        <fct>  <fct> <int>
## 1 Spring-Day   Spring Day   15456
## 2 Spring-Night Spring Night 15456
## 3 Summer-Day   Summer Day   15456
## 4 Summer-Night Summer Night 15456
## 5 Fall-Day     Fall   Day   15288
## 6 Fall-Night   Fall   Night 15288
## 7 Winter-Day   Winter Day   15156
## 8 Winter-Night Winter Night 15156
## # A tibble: 24 × 4
##     hour fct_hour tod       n
##    <int> <fct>    <fct> <int>
##  1     0 0        Night  5113
##  2     1 1        Night  5113
##  3     2 2        Night  5113
##  4     3 3        Night  5113
##  5     4 4        Night  5113
##  6     5 5        Night  5113
##  7     6 6        Night  5113
##  8     7 7        Day    5113
##  9     8 8        Day    5113
## 10     9 9        Day    5113
## 11    10 10       Day    5113
## 12    11 11       Day    5113
## 13    12 12       Day    5113
## 14    13 13       Day    5113
## 15    14 14       Day    5113
## 16    15 15       Day    5113
## 17    16 16       Day    5113
## 18    17 17       Day    5113
## 19    18 18       Day    5113
## 20    19 19       Night  5113
## 21    20 20       Night  5113
## 22    21 21       Night  5113
## 23    22 22       Night  5113
## 24    23 23       Night  5113
## # A tibble: 12 × 3
##    month season     n
##    <fct> <fct>  <int>
##  1 Jan   Winter 10416
##  2 Feb   Winter  9480
##  3 Mar   Spring 10416
##  4 Apr   Spring 10080
##  5 May   Spring 10416
##  6 Jun   Summer 10080
##  7 Jul   Summer 10416
##  8 Aug   Summer 10416
##  9 Sep   Fall   10080
## 10 Oct   Fall   10416
## 11 Nov   Fall   10080
## 12 Dec   Winter 10416

Processed hourly data for Chicago and Houston are loaded:

# Read hourly JSON file (CHI and HOU)
chiTemp <- formatOpenMeteoJSON("testOM_hourly_chi.json", addVars=TRUE)
## 
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, hourly_units, hourly 
## 
## $tblDaily
## NULL
## 
## $tblHourly
## # A tibble: 122,712 × 37
##    time                date        hour temperature_2m relativehumidity_2m
##    <dttm>              <date>     <int>          <dbl>               <int>
##  1 2010-01-01 00:00:00 2010-01-01     0           -9.5                  67
##  2 2010-01-01 01:00:00 2010-01-01     1           -9.8                  69
##  3 2010-01-01 02:00:00 2010-01-01     2          -10.3                  73
##  4 2010-01-01 03:00:00 2010-01-01     3          -10.8                  74
##  5 2010-01-01 04:00:00 2010-01-01     4          -11.3                  75
##  6 2010-01-01 05:00:00 2010-01-01     5          -11.8                  76
##  7 2010-01-01 06:00:00 2010-01-01     6          -12.3                  77
##  8 2010-01-01 07:00:00 2010-01-01     7          -12.8                  78
##  9 2010-01-01 08:00:00 2010-01-01     8          -13.2                  79
## 10 2010-01-01 09:00:00 2010-01-01     9          -13.4                  78
## # ℹ 122,702 more rows
## # ℹ 32 more variables: dewpoint_2m <dbl>, apparent_temperature <dbl>,
## #   pressure_msl <dbl>, surface_pressure <dbl>, precipitation <dbl>,
## #   rain <dbl>, snowfall <dbl>, cloudcover <int>, cloudcover_low <int>,
## #   cloudcover_mid <int>, cloudcover_high <int>, shortwave_radiation <dbl>,
## #   direct_radiation <dbl>, direct_normal_irradiance <dbl>,
## #   diffuse_radiation <dbl>, windspeed_10m <dbl>, windspeed_100m <dbl>, …
## 
## $tblUnits
## # A tibble: 34 × 4
##    metricType   name                 value   description                        
##    <chr>        <chr>                <chr>   <chr>                              
##  1 hourly_units time                 iso8601 <NA>                               
##  2 hourly_units temperature_2m       deg C   Air temperature at 2 meters above …
##  3 hourly_units relativehumidity_2m  %       Relative humidity at 2 meters abov…
##  4 hourly_units dewpoint_2m          deg C   Dew point temperature at 2 meters …
##  5 hourly_units apparent_temperature deg C   Apparent temperature is the percei…
##  6 hourly_units pressure_msl         hPa     Atmospheric air pressure reduced t…
##  7 hourly_units surface_pressure     hPa     Atmospheric air pressure reduced t…
##  8 hourly_units precipitation        mm      Total precipitation (rain, showers…
##  9 hourly_units rain                 mm      Only liquid precipitation of the p…
## 10 hourly_units snowfall             cm      Snowfall amount of the preceding h…
## # ℹ 24 more rows
## 
## $tblDescription
## # A tibble: 1 × 7
##   latitude longitude generationtime_ms utc_offset_seconds timezone       
##      <dbl>     <dbl>             <dbl>              <int> <chr>          
## 1     41.9     -87.6             4476.             -18000 America/Chicago
## # ℹ 2 more variables: timezone_abbreviation <chr>, elevation <dbl>
## 
## 
## latitude: 41.86292
## longitude: -87.64877
## generationtime_ms: 4476.2
## utc_offset_seconds: -18000
## timezone: America/Chicago
## timezone_abbreviation: CDT
## elevation: 180
## 
## Rows: 122,712
## Columns: 80
## $ time                              <dttm> 2010-01-01 00:00:00, 2010-01-01 01:…
## $ date                              <date> 2010-01-01, 2010-01-01, 2010-01-01,…
## $ hour                              <int> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11…
## $ temperature_2m                    <dbl> -9.5, -9.8, -10.3, -10.8, -11.3, -11…
## $ relativehumidity_2m               <int> 67, 69, 73, 74, 75, 76, 77, 78, 79, …
## $ dewpoint_2m                       <dbl> -14.4, -14.4, -14.2, -14.5, -14.8, -…
## $ apparent_temperature              <dbl> -15.8, -16.3, -16.8, -17.2, -17.7, -…
## $ pressure_msl                      <dbl> 1024.4, 1024.7, 1025.3, 1025.8, 1026…
## $ surface_pressure                  <dbl> 1000.8, 1001.1, 1001.6, 1002.1, 1002…
## $ precipitation                     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ rain                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ snowfall                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cloudcover                        <int> 62, 47, 20, 15, 15, 19, 25, 22, 22, …
## $ cloudcover_low                    <int> 69, 52, 22, 17, 17, 21, 28, 25, 25, …
## $ cloudcover_mid                    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, …
## $ cloudcover_high                   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ shortwave_radiation               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 119, …
## $ direct_radiation                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 69, 14…
## $ direct_normal_irradiance          <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0…
## $ diffuse_radiation                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 50, 7…
## $ windspeed_10m                     <dbl> 18.7, 20.1, 19.9, 19.5, 19.0, 19.4, …
## $ windspeed_100m                    <dbl> 25.9, 28.4, 29.2, 29.8, 30.1, 30.0, …
## $ winddirection_10m                 <int> 298, 291, 290, 289, 289, 288, 287, 2…
## $ winddirection_100m                <int> 299, 294, 294, 295, 295, 294, 295, 2…
## $ windgusts_10m                     <dbl> 33.8, 32.4, 34.2, 33.1, 31.3, 31.7, …
## $ et0_fao_evapotranspiration        <dbl> 0.02, 0.01, 0.01, 0.01, 0.01, 0.01, …
## $ weathercode                       <int> 2, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, …
## $ vapor_pressure_deficit            <dbl> 0.10, 0.09, 0.08, 0.07, 0.06, 0.06, …
## $ soil_temperature_0_to_7cm         <dbl> -1.5, -1.6, -1.8, -1.9, -2.1, -2.3, …
## $ soil_temperature_7_to_28cm        <dbl> -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, …
## $ soil_temperature_28_to_100cm      <dbl> 2.4, 2.4, 2.4, 2.4, 2.3, 2.3, 2.3, 2…
## $ soil_temperature_100_to_255cm     <dbl> 9.0, 9.0, 9.0, 9.0, 8.9, 8.9, 8.9, 8…
## $ soil_moisture_0_to_7cm            <dbl> 0.295, 0.295, 0.294, 0.294, 0.294, 0…
## $ soil_moisture_7_to_28cm           <dbl> 0.300, 0.300, 0.300, 0.300, 0.300, 0…
## $ soil_moisture_28_to_100cm         <dbl> 0.334, 0.334, 0.334, 0.334, 0.334, 0…
## $ soil_moisture_100_to_255cm        <dbl> 0.310, 0.310, 0.310, 0.310, 0.311, 0…
## $ origTime                          <chr> "2010-01-01T00:00", "2010-01-01T01:0…
## $ year                              <dbl> 2010, 2010, 2010, 2010, 2010, 2010, …
## $ month                             <fct> Jan, Jan, Jan, Jan, Jan, Jan, Jan, J…
## $ fct_hour                          <fct> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11…
## $ tod                               <fct> Night, Night, Night, Night, Night, N…
## $ doy                               <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ season                            <fct> Winter, Winter, Winter, Winter, Wint…
## $ todSeason                         <fct> Winter-Night, Winter-Night, Winter-N…
## $ pct_hour                          <dbl> 0, 4, 8, 13, 17, 21, 25, 29, 33, 38,…
## $ pct_temperature_2m                <dbl> 4, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, …
## $ pct_relativehumidity_2m           <dbl> 33, 37, 46, 48, 50, 52, 55, 57, 59, …
## $ pct_dewpoint_2m                   <dbl> 4, 4, 5, 4, 4, 4, 4, 4, 3, 3, 3, 4, …
## $ pct_apparent_temperature          <dbl> 4, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, …
## $ pct_pressure_msl                  <dbl> 84, 85, 86, 88, 89, 89, 90, 91, 91, …
## $ pct_surface_pressure              <dbl> 80, 81, 83, 85, 85, 86, 87, 89, 89, …
## $ pct_precipitation                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_rain                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_snowfall                      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_cloudcover                    <dbl> 62, 55, 33, 30, 30, 33, 37, 35, 35, …
## $ pct_cloudcover_low                <dbl> 77, 74, 66, 64, 64, 66, 68, 67, 67, …
## $ pct_cloudcover_mid                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45, 0,…
## $ pct_cloudcover_high               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_shortwave_radiation           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 63, 7…
## $ pct_direct_radiation              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 57, 69, 7…
## $ pct_direct_normal_irradiance      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 76, 8…
## $ pct_diffuse_radiation             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 51, 59, 6…
## $ pct_windspeed_10m                 <dbl> 66, 72, 71, 70, 68, 69, 65, 63, 59, …
## $ pct_windspeed_100m                <dbl> 59, 67, 69, 71, 72, 72, 67, 63, 61, …
## $ pct_winddirection_10m             <dbl> 87, 85, 84, 84, 84, 84, 83, 83, 83, …
## $ pct_winddirection_100m            <dbl> 86, 85, 85, 85, 85, 85, 85, 85, 84, …
## $ pct_windgusts_10m                 <dbl> 69, 65, 70, 67, 62, 63, 63, 61, 59, …
## $ pct_et0_fao_evapotranspiration    <dbl> 27, 16, 16, 16, 16, 16, 16, 16, 16, …
## $ pct_weathercode                   <dbl> 55, 34, 0, 0, 0, 0, 34, 34, 34, 0, 3…
## $ pct_vapor_pressure_deficit        <dbl> 17, 15, 12, 10, 7, 7, 5, 5, 5, 5, 5,…
## $ pct_soil_temperature_0_to_7cm     <dbl> 9, 8, 7, 6, 6, 5, 4, 3, 3, 2, 2, 2, …
## $ pct_soil_temperature_7_to_28cm    <dbl> 11, 11, 11, 11, 11, 11, 11, 11, 11, …
## $ pct_soil_temperature_28_to_100cm  <dbl> 18, 18, 18, 18, 18, 18, 18, 18, 18, …
## $ pct_soil_temperature_100_to_255cm <dbl> 40, 40, 40, 40, 40, 40, 40, 40, 40, …
## $ pct_soil_moisture_0_to_7cm        <dbl> 80, 80, 80, 80, 80, 80, 80, 80, 80, …
## $ pct_soil_moisture_7_to_28cm       <dbl> 84, 84, 84, 84, 84, 84, 84, 84, 84, …
## $ pct_soil_moisture_28_to_100cm     <dbl> 99, 99, 99, 99, 99, 99, 99, 98, 98, …
## $ pct_soil_moisture_100_to_255cm    <dbl> 85, 85, 85, 85, 86, 86, 86, 86, 86, …
## $ pct_year                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_doy                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …

## # A tibble: 8 × 4
##   todSeason    season tod       n
##   <fct>        <fct>  <fct> <int>
## 1 Spring-Day   Spring Day   15456
## 2 Spring-Night Spring Night 15456
## 3 Summer-Day   Summer Day   15456
## 4 Summer-Night Summer Night 15456
## 5 Fall-Day     Fall   Day   15288
## 6 Fall-Night   Fall   Night 15288
## 7 Winter-Day   Winter Day   15156
## 8 Winter-Night Winter Night 15156
## # A tibble: 24 × 4
##     hour fct_hour tod       n
##    <int> <fct>    <fct> <int>
##  1     0 0        Night  5113
##  2     1 1        Night  5113
##  3     2 2        Night  5113
##  4     3 3        Night  5113
##  5     4 4        Night  5113
##  6     5 5        Night  5113
##  7     6 6        Night  5113
##  8     7 7        Day    5113
##  9     8 8        Day    5113
## 10     9 9        Day    5113
## 11    10 10       Day    5113
## 12    11 11       Day    5113
## 13    12 12       Day    5113
## 14    13 13       Day    5113
## 15    14 14       Day    5113
## 16    15 15       Day    5113
## 17    16 16       Day    5113
## 18    17 17       Day    5113
## 19    18 18       Day    5113
## 20    19 19       Night  5113
## 21    20 20       Night  5113
## 22    21 21       Night  5113
## 23    22 22       Night  5113
## 24    23 23       Night  5113
## # A tibble: 12 × 3
##    month season     n
##    <fct> <fct>  <int>
##  1 Jan   Winter 10416
##  2 Feb   Winter  9480
##  3 Mar   Spring 10416
##  4 Apr   Spring 10080
##  5 May   Spring 10416
##  6 Jun   Summer 10080
##  7 Jul   Summer 10416
##  8 Aug   Summer 10416
##  9 Sep   Fall   10080
## 10 Oct   Fall   10416
## 11 Nov   Fall   10080
## 12 Dec   Winter 10416
houTemp <- formatOpenMeteoJSON("testOM_hourly_hou.json", addVars=TRUE)
## 
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, hourly_units, hourly 
## 
## $tblDaily
## NULL
## 
## $tblHourly
## # A tibble: 122,712 × 37
##    time                date        hour temperature_2m relativehumidity_2m
##    <dttm>              <date>     <int>          <dbl>               <int>
##  1 2010-01-01 00:00:00 2010-01-01     0           10.9                  93
##  2 2010-01-01 01:00:00 2010-01-01     1            9.9                  92
##  3 2010-01-01 02:00:00 2010-01-01     2            8.6                  88
##  4 2010-01-01 03:00:00 2010-01-01     3            7.7                  86
##  5 2010-01-01 04:00:00 2010-01-01     4            7.2                  85
##  6 2010-01-01 05:00:00 2010-01-01     5            6.8                  84
##  7 2010-01-01 06:00:00 2010-01-01     6            6.4                  82
##  8 2010-01-01 07:00:00 2010-01-01     7            5.9                  83
##  9 2010-01-01 08:00:00 2010-01-01     8            5.6                  83
## 10 2010-01-01 09:00:00 2010-01-01     9            5.5                  82
## # ℹ 122,702 more rows
## # ℹ 32 more variables: dewpoint_2m <dbl>, apparent_temperature <dbl>,
## #   pressure_msl <dbl>, surface_pressure <dbl>, precipitation <dbl>,
## #   rain <dbl>, snowfall <dbl>, cloudcover <int>, cloudcover_low <int>,
## #   cloudcover_mid <int>, cloudcover_high <int>, shortwave_radiation <dbl>,
## #   direct_radiation <dbl>, direct_normal_irradiance <dbl>,
## #   diffuse_radiation <dbl>, windspeed_10m <dbl>, windspeed_100m <dbl>, …
## 
## $tblUnits
## # A tibble: 34 × 4
##    metricType   name                 value   description                        
##    <chr>        <chr>                <chr>   <chr>                              
##  1 hourly_units time                 iso8601 <NA>                               
##  2 hourly_units temperature_2m       deg C   Air temperature at 2 meters above …
##  3 hourly_units relativehumidity_2m  %       Relative humidity at 2 meters abov…
##  4 hourly_units dewpoint_2m          deg C   Dew point temperature at 2 meters …
##  5 hourly_units apparent_temperature deg C   Apparent temperature is the percei…
##  6 hourly_units pressure_msl         hPa     Atmospheric air pressure reduced t…
##  7 hourly_units surface_pressure     hPa     Atmospheric air pressure reduced t…
##  8 hourly_units precipitation        mm      Total precipitation (rain, showers…
##  9 hourly_units rain                 mm      Only liquid precipitation of the p…
## 10 hourly_units snowfall             cm      Snowfall amount of the preceding h…
## # ℹ 24 more rows
## 
## $tblDescription
## # A tibble: 1 × 7
##   latitude longitude generationtime_ms utc_offset_seconds timezone  
##      <dbl>     <dbl>             <dbl>              <int> <chr>     
## 1     29.8     -95.4             3762.             -18000 US/Central
## # ℹ 2 more variables: timezone_abbreviation <chr>, elevation <dbl>
## 
## 
## latitude: 29.77153
## longitude: -95.43555
## generationtime_ms: 3762.283
## utc_offset_seconds: -18000
## timezone: US/Central
## timezone_abbreviation: CDT
## elevation: 17
## 
## Rows: 122,712
## Columns: 80
## $ time                              <dttm> 2010-01-01 00:00:00, 2010-01-01 01:…
## $ date                              <date> 2010-01-01, 2010-01-01, 2010-01-01,…
## $ hour                              <int> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11…
## $ temperature_2m                    <dbl> 10.9, 9.9, 8.6, 7.7, 7.2, 6.8, 6.4, …
## $ relativehumidity_2m               <int> 93, 92, 88, 86, 85, 84, 82, 83, 83, …
## $ dewpoint_2m                       <dbl> 9.8, 8.6, 6.7, 5.6, 4.8, 4.2, 3.6, 3…
## $ apparent_temperature              <dbl> 7.4, 5.7, 4.1, 3.2, 2.9, 2.4, 2.2, 1…
## $ pressure_msl                      <dbl> 1025.2, 1025.9, 1026.8, 1027.1, 1027…
## $ surface_pressure                  <dbl> 1023.1, 1023.8, 1024.7, 1025.0, 1025…
## $ precipitation                     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ rain                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ snowfall                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cloudcover                        <int> 90, 90, 88, 88, 89, 89, 86, 80, 90, …
## $ cloudcover_low                    <int> 100, 100, 98, 98, 99, 99, 96, 89, 10…
## $ cloudcover_mid                    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cloudcover_high                   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ shortwave_radiation               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 89, 1…
## $ direct_radiation                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 28, 58…
## $ direct_normal_irradiance          <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0…
## $ diffuse_radiation                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 61, 1…
## $ windspeed_10m                     <dbl> 24.0, 25.9, 25.3, 23.5, 20.9, 20.7, …
## $ windspeed_100m                    <dbl> 37.4, 39.1, 38.4, 35.4, 32.0, 31.2, …
## $ winddirection_10m                 <int> 330, 333, 336, 339, 341, 340, 347, 3…
## $ winddirection_100m                <int> 332, 334, 337, 341, 343, 341, 347, 3…
## $ windgusts_10m                     <dbl> 44.3, 46.1, 46.8, 44.3, 41.0, 37.8, …
## $ et0_fao_evapotranspiration        <dbl> 0.00, 0.01, 0.01, 0.01, 0.02, 0.02, …
## $ weathercode                       <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, …
## $ vapor_pressure_deficit            <dbl> 0.10, 0.10, 0.14, 0.14, 0.16, 0.16, …
## $ soil_temperature_0_to_7cm         <dbl> 11.9, 11.5, 11.0, 10.5, 10.1, 9.8, 9…
## $ soil_temperature_7_to_28cm        <dbl> 12.3, 12.3, 12.2, 12.2, 12.1, 12.0, …
## $ soil_temperature_28_to_100cm      <dbl> 14.2, 14.2, 14.2, 14.2, 14.2, 14.2, …
## $ soil_temperature_100_to_255cm     <dbl> 20.9, 20.9, 20.9, 20.9, 20.9, 20.9, …
## $ soil_moisture_0_to_7cm            <dbl> 0.462, 0.462, 0.462, 0.462, 0.462, 0…
## $ soil_moisture_7_to_28cm           <dbl> 0.474, 0.474, 0.474, 0.474, 0.473, 0…
## $ soil_moisture_28_to_100cm         <dbl> 0.498, 0.498, 0.498, 0.498, 0.498, 0…
## $ soil_moisture_100_to_255cm        <dbl> 0.453, 0.453, 0.453, 0.453, 0.453, 0…
## $ origTime                          <chr> "2010-01-01T00:00", "2010-01-01T01:0…
## $ year                              <dbl> 2010, 2010, 2010, 2010, 2010, 2010, …
## $ month                             <fct> Jan, Jan, Jan, Jan, Jan, Jan, Jan, J…
## $ fct_hour                          <fct> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11…
## $ tod                               <fct> Night, Night, Night, Night, Night, N…
## $ doy                               <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ season                            <fct> Winter, Winter, Winter, Winter, Wint…
## $ todSeason                         <fct> Winter-Night, Winter-Night, Winter-N…
## $ pct_hour                          <dbl> 0, 4, 8, 13, 17, 21, 25, 29, 33, 38,…
## $ pct_temperature_2m                <dbl> 12, 10, 8, 6, 6, 5, 5, 4, 4, 4, 4, 5…
## $ pct_relativehumidity_2m           <dbl> 80, 77, 67, 63, 61, 59, 55, 57, 57, …
## $ pct_dewpoint_2m                   <dbl> 23, 21, 17, 15, 13, 12, 11, 10, 9, 9…
## $ pct_apparent_temperature          <dbl> 11, 9, 6, 5, 5, 4, 4, 4, 4, 3, 3, 4,…
## $ pct_pressure_msl                  <dbl> 92, 93, 94, 95, 96, 97, 97, 97, 97, …
## $ pct_surface_pressure              <dbl> 92, 93, 94, 95, 96, 97, 97, 97, 98, …
## $ pct_precipitation                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_rain                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_snowfall                      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_cloudcover                    <dbl> 80, 80, 79, 79, 79, 79, 78, 76, 80, …
## $ pct_cloudcover_low                <dbl> 89, 89, 87, 87, 88, 88, 86, 84, 89, …
## $ pct_cloudcover_mid                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_cloudcover_high               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_shortwave_radiation           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 51, 59, 6…
## $ pct_direct_radiation              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 61, 6…
## $ pct_direct_normal_irradiance      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 54, 63, 6…
## $ pct_diffuse_radiation             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 60, 7…
## $ pct_windspeed_10m                 <dbl> 95, 97, 96, 94, 90, 89, 83, 79, 78, …
## $ pct_windspeed_100m                <dbl> 96, 97, 97, 95, 90, 89, 82, 78, 76, …
## $ pct_winddirection_10m             <dbl> 91, 92, 92, 93, 93, 93, 95, 98, 96, …
## $ pct_winddirection_100m            <dbl> 92, 92, 93, 94, 94, 94, 96, 99, 97, …
## $ pct_windgusts_10m                 <dbl> 94, 96, 96, 94, 91, 87, 87, 84, 77, …
## $ pct_et0_fao_evapotranspiration    <dbl> 0, 24, 24, 24, 32, 32, 32, 24, 24, 3…
## $ pct_weathercode                   <dbl> 69, 69, 69, 69, 69, 69, 69, 69, 69, …
## $ pct_vapor_pressure_deficit        <dbl> 10, 10, 16, 16, 19, 19, 20, 19, 19, …
## $ pct_soil_temperature_0_to_7cm     <dbl> 10, 9, 8, 7, 6, 6, 5, 4, 4, 4, 4, 5,…
## $ pct_soil_temperature_7_to_28cm    <dbl> 6, 6, 6, 6, 6, 6, 5, 5, 5, 4, 4, 4, …
## $ pct_soil_temperature_28_to_100cm  <dbl> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, …
## $ pct_soil_temperature_100_to_255cm <dbl> 38, 38, 38, 38, 38, 38, 38, 38, 38, …
## $ pct_soil_moisture_0_to_7cm        <dbl> 82, 82, 82, 82, 82, 82, 82, 82, 82, …
## $ pct_soil_moisture_7_to_28cm       <dbl> 88, 88, 88, 88, 88, 88, 88, 88, 88, …
## $ pct_soil_moisture_28_to_100cm     <dbl> 98, 98, 98, 98, 98, 98, 98, 98, 98, …
## $ pct_soil_moisture_100_to_255cm    <dbl> 82, 82, 82, 82, 82, 82, 82, 82, 82, …
## $ pct_year                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_doy                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …

## # A tibble: 8 × 4
##   todSeason    season tod       n
##   <fct>        <fct>  <fct> <int>
## 1 Spring-Day   Spring Day   15456
## 2 Spring-Night Spring Night 15456
## 3 Summer-Day   Summer Day   15456
## 4 Summer-Night Summer Night 15456
## 5 Fall-Day     Fall   Day   15288
## 6 Fall-Night   Fall   Night 15288
## 7 Winter-Day   Winter Day   15156
## 8 Winter-Night Winter Night 15156
## # A tibble: 24 × 4
##     hour fct_hour tod       n
##    <int> <fct>    <fct> <int>
##  1     0 0        Night  5113
##  2     1 1        Night  5113
##  3     2 2        Night  5113
##  4     3 3        Night  5113
##  5     4 4        Night  5113
##  6     5 5        Night  5113
##  7     6 6        Night  5113
##  8     7 7        Day    5113
##  9     8 8        Day    5113
## 10     9 9        Day    5113
## 11    10 10       Day    5113
## 12    11 11       Day    5113
## 13    12 12       Day    5113
## 14    13 13       Day    5113
## 15    14 14       Day    5113
## 16    15 15       Day    5113
## 17    16 16       Day    5113
## 18    17 17       Day    5113
## 19    18 18       Day    5113
## 20    19 19       Night  5113
## 21    20 20       Night  5113
## 22    21 21       Night  5113
## 23    22 22       Night  5113
## 24    23 23       Night  5113
## # A tibble: 12 × 3
##    month season     n
##    <fct> <fct>  <int>
##  1 Jan   Winter 10416
##  2 Feb   Winter  9480
##  3 Mar   Spring 10416
##  4 Apr   Spring 10080
##  5 May   Spring 10416
##  6 Jun   Summer 10080
##  7 Jul   Summer 10416
##  8 Aug   Summer 10416
##  9 Sep   Fall   10080
## 10 Oct   Fall   10416
## 11 Nov   Fall   10080
## 12 Dec   Winter 10416

An integrated set of all-city test and train data is created:

# Bind all the data frames
allCity <- list("NYC"=nycTemp, 
                "LA"=laxTemp, 
                "Chicago"=chiTemp, 
                "Houston"=houTemp
                ) %>%
    bind_rows(.id="src")

# Create the index for training data
set.seed(24061512)
idxTrain <- sample(1:nrow(allCity), size = round(0.7*nrow(allCity)), replace=FALSE)

# Add test-train flag to full dataset
allCity <- allCity %>%
    mutate(tt=ifelse(row_number() %in% idxTrain, "train", "test"), 
           fct_src=factor(src))
allCity
## # A tibble: 486,072 × 83
##    src   time                date        hour temperature_2m relativehumidity_2m
##    <chr> <dttm>              <date>     <int>          <dbl>               <int>
##  1 NYC   2010-01-01 00:00:00 2010-01-01     0           -1.1                  95
##  2 NYC   2010-01-01 01:00:00 2010-01-01     1           -1                    96
##  3 NYC   2010-01-01 02:00:00 2010-01-01     2           -1                    96
##  4 NYC   2010-01-01 03:00:00 2010-01-01     3           -0.8                  97
##  5 NYC   2010-01-01 04:00:00 2010-01-01     4           -0.9                  97
##  6 NYC   2010-01-01 05:00:00 2010-01-01     5           -0.8                  97
##  7 NYC   2010-01-01 06:00:00 2010-01-01     6           -0.7                  97
##  8 NYC   2010-01-01 07:00:00 2010-01-01     7           -0.5                  97
##  9 NYC   2010-01-01 08:00:00 2010-01-01     8           -0.6                  97
## 10 NYC   2010-01-01 09:00:00 2010-01-01     9           -0.6                  97
## # ℹ 486,062 more rows
## # ℹ 77 more variables: dewpoint_2m <dbl>, apparent_temperature <dbl>,
## #   pressure_msl <dbl>, surface_pressure <dbl>, precipitation <dbl>,
## #   rain <dbl>, snowfall <dbl>, cloudcover <int>, cloudcover_low <int>,
## #   cloudcover_mid <int>, cloudcover_high <int>, shortwave_radiation <dbl>,
## #   direct_radiation <dbl>, direct_normal_irradiance <dbl>,
## #   diffuse_radiation <dbl>, windspeed_10m <dbl>, windspeed_100m <dbl>, …
# Review counts by year
allCity %>% 
    count(year, src, tt) %>% 
    pivot_wider(id_cols=c("src", "tt"), names_from="year", values_from="n")
## # A tibble: 8 × 16
##   src     tt    `2010` `2011` `2012` `2013` `2014` `2015` `2016` `2017` `2018`
##   <chr>   <chr>  <int>  <int>  <int>  <int>  <int>  <int>  <int>  <int>  <int>
## 1 Chicago test    2555   2660   2671   2667   2612   2648   2550   2567   2648
## 2 Chicago train   6205   6100   6113   6093   6148   6112   6234   6193   6112
## 3 Houston test    2666   2562   2671   2621   2695   2639   2595   2688   2631
## 4 Houston train   6094   6198   6113   6139   6065   6121   6189   6072   6129
## 5 LA      test    2638   2653   2679   2591   2645   2634   2648   2579   2729
## 6 LA      train   6122   6107   6105   6169   6115   6126   6136   6181   6031
## 7 NYC     test    2644   2648   2579   2627   2645   2577   2603   2589   2618
## 8 NYC     train   6116   6112   6205   6133   6115   6183   6181   6171   6142
## # ℹ 5 more variables: `2019` <int>, `2020` <int>, `2021` <int>, `2022` <int>,
## #   `2023` <int>

Distributions of several key variables are explored:

keyVars <- c('temperature_2m', 
             'relativehumidity_2m', 
             'dewpoint_2m', 
             'shortwave_radiation', 
             'vapor_pressure_deficit', 
             'soil_temperature_28_to_100cm', 
             'soil_temperature_100_to_255cm', 
             'soil_moisture_28_to_100cm', 
             'soil_moisture_100_to_255cm'
             )

allCity %>%
    colSelector(vecSelect=c("src", keyVars)) %>%
    pivot_longer(cols=-c(src)) %>%
    ggplot(aes(x=src, y=value)) + 
    geom_boxplot(aes(fill=src)) + 
    facet_wrap(~name, scales="free_y") + 
    labs(x=NULL, y=NULL, title="Distribution of Key Metrics by City") + 
    scale_fill_discrete(NULL)

In addition, pair plots by city are create for several combinations of variables:

keyVars <- c('pressure_msl', 
             'surface_pressure', 
             'soil_temperature_100_to_255cm', 
             'soil_moisture_100_to_255cm'
             )

for(intCtr in 1:(length(keyVars)-1)) {
    for(intCtr2 in (intCtr+1):length(keyVars)) {
        p1 <- allCity %>%
            mutate(across(c("pressure_msl", "surface_pressure", "soil_temperature_100_to_255cm"), 
                          .fns=function(x) round(x*2)/2
                          ), 
                   soil_moisture_100_to_255cm=round(soil_moisture_100_to_255cm, 2)
                   ) %>%
            colSelector(vecSelect=c("src", keyVars[c(intCtr, intCtr2)])) %>%
            group_by(across(c("src", keyVars[c(intCtr, intCtr2)]))) %>%
            summarize(n=n(), .groups="drop") %>%
            ungroup() %>%
            ggplot(aes(x=get(keyVars[intCtr]), y=get(keyVars[intCtr2]))) + 
            geom_point(aes(color=src, size=n), alpha=0.25) + 
            labs(title="Distribution of Key Metrics by City", x=keyVars[intCtr], y=keyVars[intCtr2]) + 
            scale_size_continuous("# Obs")
        print(p1)
    }
}

The cities are well differentiated by several combinations, particularly surface pressure vs. MSL pressure

A full random forest model is run for predicting city using LA, NYC, and Chicago:

# Create set of relevant training variables
varsTrain <- allCity %>%
    select(starts_with("pct")) %>%
    names() %>%
    str_replace(pattern="pct_", replacement="")
varsTrain
##  [1] "hour"                          "temperature_2m"               
##  [3] "relativehumidity_2m"           "dewpoint_2m"                  
##  [5] "apparent_temperature"          "pressure_msl"                 
##  [7] "surface_pressure"              "precipitation"                
##  [9] "rain"                          "snowfall"                     
## [11] "cloudcover"                    "cloudcover_low"               
## [13] "cloudcover_mid"                "cloudcover_high"              
## [15] "shortwave_radiation"           "direct_radiation"             
## [17] "direct_normal_irradiance"      "diffuse_radiation"            
## [19] "windspeed_10m"                 "windspeed_100m"               
## [21] "winddirection_10m"             "winddirection_100m"           
## [23] "windgusts_10m"                 "et0_fao_evapotranspiration"   
## [25] "weathercode"                   "vapor_pressure_deficit"       
## [27] "soil_temperature_0_to_7cm"     "soil_temperature_7_to_28cm"   
## [29] "soil_temperature_28_to_100cm"  "soil_temperature_100_to_255cm"
## [31] "soil_moisture_0_to_7cm"        "soil_moisture_7_to_28cm"      
## [33] "soil_moisture_28_to_100cm"     "soil_moisture_100_to_255cm"   
## [35] "year"                          "doy"
keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
keyCities <- c("NYC", "LA", "Chicago")

rfCity <- runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022, src %in% keyCities), 
                     yVar="fct_src", 
                     xVars=varsTrain, 
                     dfTest=allCity %>% filter(tt=="test", year==2022, src %in% keyCities), 
                     useLabel=keyLabel, 
                     useSub=stringr::str_to_sentence(keyLabel), 
                     returnData=TRUE
                     )
## Warning: Dropped unused factor level(s) in dependent variable: Houston.

## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 100%

Prediction accuracy is 100%, as expected given the significant differentiation. Houston is assessed for the city it is “most similar” to:

predictRF(rfCity$rf, df=allCity %>% filter(tt=="test", year==2022)) %>%
    plotConfusion(trueCol="fct_src", useSub=NULL, plotCont=FALSE)

Based on predictors in the three-city random forest, Houston is most similar to NYC. The full random forest model is updated, including Houston:

keyCities <- c("NYC", "LA", "Chicago", "Houston")
rfCity <- runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022, src %in% keyCities), 
                    yVar="fct_src", 
                    xVars=varsTrain, 
                    dfTest=allCity %>% filter(tt=="test", year==2022, src %in% keyCities), 
                    useLabel=keyLabel, 
                    useSub=stringr::str_to_sentence(keyLabel), 
                    returnData=TRUE
                    )

## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 100%

Even with the similarities between NYC and Houston, there is sufficient differentiation in the predictors to drive 100% accuracy

A model is created to predict temperature for two cities:

keyCities <- c("NYC", "Chicago")
keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
rfTemp2m <- runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022, src %in% keyCities), 
                      yVar="temperature_2m", 
                      xVars=c(varsTrain[!str_detect(varsTrain, "^temp|ature$")]), 
                      dfTest=allCity %>% filter(tt=="test", year==2022, src %in% keyCities), 
                      useLabel=keyLabel, 
                      useSub=stringr::str_to_sentence(keyLabel), 
                      isContVar=TRUE,
                      rndTo=-1L,
                      refXY=TRUE,
                      returnData=TRUE
                      )
## Growing trees.. Progress: 65%. Estimated remaining time: 16 seconds.

## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.41% (RMSE 0.86 vs. 11.16 null)
## `geom_smooth()` using formula = 'y ~ x'

Temperature predictions on holdout data for NYC and Chicago have R-squared over 99%. The model is applied to data from Houston and LA:

# Temperature predictions for LA
predTempLA <- predictRF(rfTemp2m$rf, df=allCity %>% filter(tt=="test", year==2022, src=="LA"))
reportAccuracy(predTempLA, trueCol="temperature_2m", reportR2=TRUE, useLabel="LA temperature predictions")
## 
## R-squared of LA temperature predictions is: 92.38% (RMSE 1.89 vs. 6.86 null)
plotConfusion(predTempLA, trueCol="temperature_2m", plotCont=TRUE, rndTo=0.5, refXY=TRUE, useSub="LA")
## `geom_smooth()` using formula = 'y ~ x'

# Temperature predictions for Houston
predTempHOU <- predictRF(rfTemp2m$rf, df=allCity %>% filter(tt=="test", year==2022, src=="Houston"))
reportAccuracy(predTempHOU, trueCol="temperature_2m", reportR2=TRUE, useLabel="Houston temperature predictions")
## 
## R-squared of Houston temperature predictions is: 97.22% (RMSE 1.44 vs. 8.63 null)
plotConfusion(predTempHOU, trueCol="temperature_2m", plotCont=TRUE, rndTo=0.5, refXY=TRUE, useSub="Houston")
## `geom_smooth()` using formula = 'y ~ x'

Predictions for two cities not included in the original model have ~95% R-squared. Houston being relatively similar to NYC has higher R-squared than LA

Function runFullRF() is updated to allow for using an existing model with new data:

runFullRF <- function(dfTrain, 
                      yVar, 
                      xVars, 
                      useExistingRF=NULL,
                      dfTest=dfTrain,
                      useLabel="test data",
                      useSub=NULL, 
                      isContVar=FALSE,
                      rndTo=NULL,
                      rndBucketsAuto=100,
                      nSig=NULL,
                      refXY=FALSE,
                      makePlots=TRUE,
                      plotImp=makePlots,
                      plotConf=makePlots,
                      returnData=FALSE, 
                      ...
                      ) {
    
    # FUNCTION ARGUMENTS:
    # dfTrain: training data
    # yVar: dependent variable
    # xVars: column(s) containing independent variables
    # useExistingRF: an existing RF model, meaning only steps 3-5 are run (default NULL means run all steps)
    # dfTest: test dataset for applying predictions
    # useLabel: label to be used for reporting accuracy
    # useSub: subtitle to be used for confusion chart (NULL means none)
    # isContVar: boolean, is the variable continuous? (default FALSE means categorical)
    # rndTo: every number in x should be rounded to the nearest rndTo
    #        NULL means no rounding (default)
    #        -1L means make an estimate based on data
    # rndBucketsAuto: integer, if rndTo is -1L, about how many buckets are desired for predictions?
    # nSig: number of significant digits for automatically calculated rounding parameter
    #       (NULL means calculate exactly)    
    # refXY: boolean, should a reference line for y=x be included? (relevant only for continuous)
    # makePlots: boolean, should plots be created for variable importance and confusion matrix?
    # plotImp: boolean, should variable importance be plotted? (default is makePlots)
    # plotConf: boolean, should confusion matrix be plotted? (default is makePlots)
    # returnData: boolean, should data be returned?
    # ...: additional parameters to pass to runSimpleRF(), which are then passed to ranger::ranger()

    # Create the RF and plot importances, unless an RF is passed
    if(is.null(useExistingRF)) {
        # 1. Run random forest using impurity for importance
        rf <- runSimpleRF(df=dfTrain, yVar=yVar, xVars=xVars, importance="impurity", ...)

        # 2. Create, and optionally plot, variable importance
        rfImp <- plotRFImportance(rf, plotData=plotImp, returnData=TRUE)
    }
    else {
        rf <- useExistingRF
        rfImp <- NA
    }

    # 3. Predict on test dataset
    tstPred <- predictRF(rf=rf, df=dfTest)

    # 4. Report on accuracy (updated for continuous or categorical)
    rfAcc <- reportAccuracy(tstPred, 
                            trueCol=yVar, 
                            rndReport=3, 
                            useLabel=useLabel, 
                            reportR2=isTRUE(isContVar),
                            returnAcc=TRUE
                            )

    # 5. Plot confusion data (updated for continuous vs. categorical) if requested
    if(isTRUE(plotConf)) {
        plotConfusion(tstPred, 
                      trueCol=yVar, 
                      useSub=useSub, 
                      plotCont=isTRUE(isContVar), 
                      rndTo=rndTo, 
                      rndBucketsAuto=rndBucketsAuto,
                      nSig=nSig,
                      refXY=refXY
                      )
    }
    
    #6. Return data if requested
    if(isTRUE(returnData)) return(list(rf=rf, rfImp=rfImp, tstPred=tstPred, rfAcc=rfAcc))
    
}

Updated function runFullRF() is tested on LA and Houston:

# Temperature predictions for LA
runFullRF(yVar="temperature_2m", 
          useExistingRF=rfTemp2m$rf, 
          dfTest=allCity %>% filter(tt=="test", year==2022, src=="LA"), 
          useLabel="LA temperature predictions", 
          useSub="LA", 
          isContVar=TRUE,
          rndTo=0.5, 
          refXY=TRUE
          )
## 
## R-squared of LA temperature predictions is: 92.382% (RMSE 1.89 vs. 6.86 null)
## `geom_smooth()` using formula = 'y ~ x'

# Temperature predictions for Houston
runFullRF(yVar="temperature_2m", 
          useExistingRF=rfTemp2m$rf, 
          dfTest=allCity %>% filter(tt=="test", year==2022, src=="Houston"), 
          useLabel="Houston temperature predictions", 
          useSub="Houston", 
          isContVar=TRUE,
          rndTo=0.5, 
          refXY=TRUE
          )
## 
## R-squared of Houston temperature predictions is: 97.223% (RMSE 1.44 vs. 8.63 null)
## `geom_smooth()` using formula = 'y ~ x'

A basic linear model can potentially drive better temperature predictions:

keyCities <- c("NYC", "Chicago")
lmMiniTemp <- allCity %>% 
    filter(tt=="train", year<2022, src %in% keyCities) %>%
    select(t=temperature_2m, d=dewpoint_2m, rh=relativehumidity_2m) %>%
    lm(t~rh+d+rh:d+1, data=.) 
summary(lmMiniTemp)
## 
## Call:
## lm(formula = t ~ rh + d + rh:d + 1, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8377 -0.4461 -0.1708  0.2944 12.0201 
## 
## Coefficients:
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)  2.158e+01  7.965e-03  2709.77   <2e-16 ***
## rh          -2.300e-01  1.150e-04 -1999.27   <2e-16 ***
## d            1.087e+00  6.448e-04  1685.07   <2e-16 ***
## rh:d        -5.407e-04  9.068e-06   -59.63   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6296 on 147464 degrees of freedom
## Multiple R-squared:  0.9966, Adjusted R-squared:  0.9966 
## F-statistic: 1.428e+07 on 3 and 147464 DF,  p-value: < 2.2e-16
ggMiniTemp <- predict(lmMiniTemp, 
                      newdata=allCity %>% 
                          filter(tt=="test", year==2022, src %in% keyCities) %>% 
                          select(rh=relativehumidity_2m, d=dewpoint_2m)
                      ) %>% 
    mutate(select(allCity %>% filter(tt=="test", year==2022, src %in% keyCities), temperature_2m), 
           pred=., 
           err=pred-temperature_2m, 
           err2=err**2, 
           rnd5=round(temperature_2m/5)*5
           ) %>% 
    group_by(rnd5) %>% 
    summarize(n=n(), across(.cols=where(is.numeric), .fns=mean))
ggMiniTemp
## # A tibble: 13 × 6
##     rnd5     n temperature_2m    pred     err     err2
##    <dbl> <dbl>          <dbl>   <dbl>   <dbl>    <dbl>
##  1   -25     2        -23.3   -23.3   -0.0227  0.00534
##  2   -20    15        -19.4   -19.1    0.270   0.187  
##  3   -15    60        -14.5   -14.3    0.238   0.189  
##  4   -10   201         -9.76   -9.54   0.221   0.263  
##  5    -5   377         -4.52   -4.35   0.177   0.245  
##  6     0   648          0.202   0.184 -0.0177  0.267  
##  7     5   730          4.95    4.97   0.0224  0.248  
##  8    10   719         10.2    10.1   -0.0591  0.302  
##  9    15   692         14.9    14.9   -0.0380  0.433  
## 10    20   920         20.1    20.2    0.0703  0.244  
## 11    25   654         24.7    24.6   -0.0569  1.12   
## 12    30   254         29.4    28.4   -0.984   3.56   
## 13    35    38         34.2    31.2   -2.99   12.9
ggMiniTemp %>% 
    select(rnd5, temperature_2m, pred) %>%
    pivot_longer(cols=-c(rnd5)) %>%
    ggplot(aes(x=rnd5, y=value)) + 
    geom_line(aes(group=name, 
                  color=c("pred"="Predicted Mean", "temperature_2m"="Actual Mean")[name]
                  )
              ) + 
    labs(title="Actual vs. Predicted Temperature Using City Linear Model on Same City Holdout Data", 
         x="New city actual temperature (rounded to nearest 5)", 
         y="Average temperature for metric"
         ) + 
    scale_color_discrete("Metric") + 
    geom_abline(slope=1, intercept=0, lty=2)

Predictions can then be explored in cities not included in the original linear model, starting with Houston:

ggMiniTemp_hou <- predict(lmMiniTemp, 
                          newdata=allCity %>% 
                              filter(tt=="test", year==2022, src %in% c("Houston")) %>% 
                              select(rh=relativehumidity_2m, d=dewpoint_2m)
                          ) %>% 
    mutate(select(allCity %>% filter(tt=="test", year==2022, src %in% c("Houston")), temperature_2m), 
           pred=., 
           err=pred-temperature_2m, 
           err2=err**2, 
           rnd5=round(temperature_2m/5)*5
           ) %>% 
    group_by(rnd5) %>% 
    summarize(n=n(), across(.cols=where(is.numeric), .fns=mean))
ggMiniTemp_hou
## # A tibble: 11 × 6
##     rnd5     n temperature_2m   pred     err   err2
##    <dbl> <dbl>          <dbl>  <dbl>   <dbl>  <dbl>
##  1   -10     2         -7.95  -8.68  -0.730   0.600
##  2    -5    14         -4.25  -4.40  -0.150   0.428
##  3     0    38          0.429  0.608  0.179   0.222
##  4     5   197          5.21   5.29   0.0779  0.245
##  5    10   304          9.94   9.92  -0.0131  0.313
##  6    15   291         15.1   14.8   -0.292   0.700
##  7    20   507         20.3   20.0   -0.294   0.935
##  8    25   744         25.1   25.0   -0.0138  0.657
##  9    30   429         29.6   29.6    0.0214  1.12 
## 10    35   145         34.3   33.0   -1.30    2.92 
## 11    40     4         38.4   35.1   -3.34   11.3
ggMiniTemp_hou %>% 
    summarize(mse=sum(n*err2)/sum(n)) %>% 
    mutate(rmse=sqrt(mse))
## # A tibble: 1 × 2
##     mse  rmse
##   <dbl> <dbl>
## 1 0.850 0.922
ggMiniTemp_hou %>% 
    select(rnd5, temperature_2m, pred) %>%
    pivot_longer(cols=-c(rnd5)) %>%
    ggplot(aes(x=rnd5, y=value)) + 
    geom_line(aes(group=name, 
                  color=c("pred"="Predicted Mean", "temperature_2m"="Actual Mean")[name]
                  )
              ) + 
    labs(title="Actual vs. Predicted Temperature Using City Linear Model on New City (Houston) Holdout Data", 
         x="New city (Houston) actual temperature (rounded to nearest 5)", 
         y="Average temperature for metric"
         ) + 
    scale_color_discrete("Metric") + 
    geom_abline(slope=1, intercept=0, lty=2)

The linear model is generally very accurate for Houston, with the exception of under-predicting the very highest temperatures. RMSE of temperature predictions is lowered to ~1 from ~1.5 observed using the random forest

Predictions are also explored in Los Angeles:

ggMiniTemp_lax <- predict(lmMiniTemp, 
                          newdata=allCity %>% 
                              filter(tt=="test", year==2022, src %in% c("LA")) %>% 
                              select(rh=relativehumidity_2m, d=dewpoint_2m)
                          ) %>% 
    mutate(select(allCity %>% filter(tt=="test", year==2022, src %in% c("LA")), temperature_2m), 
           pred=., 
           err=pred-temperature_2m, 
           err2=err**2, 
           rnd5=round(temperature_2m/5)*5
           ) %>% 
    group_by(rnd5) %>% 
    summarize(n=n(), across(.cols=where(is.numeric), .fns=mean))
ggMiniTemp_lax
## # A tibble: 10 × 6
##     rnd5     n temperature_2m   pred     err    err2
##    <dbl> <dbl>          <dbl>  <dbl>   <dbl>   <dbl>
##  1     0     6           1.1   0.935  -0.165   0.205
##  2     5   127           5.72  5.52   -0.201   1.10 
##  3    10   605          10.2   9.20   -1.01    5.02 
##  4    15   754          15.1  13.9    -1.21    7.89 
##  5    20   585          19.7  17.5    -2.15   20.7  
##  6    25   331          24.7  22.1    -2.62   28.3  
##  7    30   176          29.7  24.2    -5.52   55.0  
##  8    35    49          34.4  25.9    -8.47   94.9  
##  9    40     7          38.9  28.6   -10.2   124.   
## 10    45     1          42.7  23.8   -18.9   356.
ggMiniTemp_lax %>% 
    summarize(mse=sum(n*err2)/sum(n)) %>% 
    mutate(rmse=sqrt(mse))
## # A tibble: 1 × 2
##     mse  rmse
##   <dbl> <dbl>
## 1  17.5  4.18
ggMiniTemp_lax %>% 
    select(rnd5, temperature_2m, pred) %>%
    pivot_longer(cols=-c(rnd5)) %>%
    ggplot(aes(x=rnd5, y=value)) + 
    geom_line(aes(group=name, 
                  color=c("pred"="Predicted Mean", "temperature_2m"="Actual Mean")[name]
                  )
              ) + 
    labs(title="Actual vs. Predicted Temperature Using City Linear Model on New City (LA) Holdout Data", 
         x="New city (LA) actual temperature (rounded to nearest 5)", 
         y="Average temperature for metric"
         ) + 
    scale_color_discrete("Metric") + 
    geom_abline(slope=1, intercept=0, lty=2)

The linear model is generally inaccurate for LA, consistently underestimating temperatures. RMSE of temperature predictions is raised to ~4 from ~2 observed using the random forest

Los Angeles is meaningfully different from NYC and Chicago on key predictors:

tmpPlotData <- allCity %>% 
    select(src, relativehumidity_2m, dewpoint_2m, temperature_2m) %>% 
    mutate(across(where(is.numeric), .fns=round)) %>% 
    count(src, relativehumidity_2m, dewpoint_2m, temperature_2m)

tmpPlotData %>%
    count(src, temperature_2m, dewpoint_2m, wt=n) %>%
    ggplot(aes(x=temperature_2m, y=dewpoint_2m)) + 
    geom_point(aes(color=src, size=n), alpha=0.2) + 
    geom_smooth(aes(color=src, weight=n), method="lm") +
    labs(title="T/D by city")
## `geom_smooth()` using formula = 'y ~ x'

tmpPlotData %>%
    count(src, temperature_2m, relativehumidity_2m, wt=n) %>%
    ggplot(aes(x=temperature_2m, y=relativehumidity_2m)) + 
    geom_point(aes(color=src, size=n), alpha=0.1) + 
    geom_smooth(aes(color=src, weight=n), method="lm") +
    labs(title="T/RH by city")
## `geom_smooth()` using formula = 'y ~ x'

Los Angeles is routinely hot and arid, while the other cities tend to be humid when they are hot. Data for an additional low-humidity city are downloaded, cached to avoid multiple hits to the server:

# Hourly data download for Las Vegas, NV
testURLHourly <- helperOpenMeteoURL(cityName="Las Vegas NV", 
                                    hourlyIndices=1:nrow(tblMetricsHourly),
                                    startDate="2010-01-01", 
                                    endDate="2023-12-31", 
                                    tz="America/Los_Angeles"
                                    )
## 
## Hourly metrics created from indices: temperature_2m,relativehumidity_2m,dewpoint_2m,apparent_temperature,pressure_msl,surface_pressure,precipitation,rain,snowfall,cloudcover,cloudcover_low,cloudcover_mid,cloudcover_high,shortwave_radiation,direct_radiation,direct_normal_irradiance,diffuse_radiation,windspeed_10m,windspeed_100m,winddirection_10m,winddirection_100m,windgusts_10m,et0_fao_evapotranspiration,weathercode,vapor_pressure_deficit,soil_temperature_0_to_7cm,soil_temperature_7_to_28cm,soil_temperature_28_to_100cm,soil_temperature_100_to_255cm,soil_moisture_0_to_7cm,soil_moisture_7_to_28cm,soil_moisture_28_to_100cm,soil_moisture_100_to_255cm
testURLHourly
## [1] "https://archive-api.open-meteo.com/v1/archive?latitude=36.21&longitude=-115.22&start_date=2010-01-01&end_date=2023-12-31&hourly=temperature_2m,relativehumidity_2m,dewpoint_2m,apparent_temperature,pressure_msl,surface_pressure,precipitation,rain,snowfall,cloudcover,cloudcover_low,cloudcover_mid,cloudcover_high,shortwave_radiation,direct_radiation,direct_normal_irradiance,diffuse_radiation,windspeed_10m,windspeed_100m,winddirection_10m,winddirection_100m,windgusts_10m,et0_fao_evapotranspiration,weathercode,vapor_pressure_deficit,soil_temperature_0_to_7cm,soil_temperature_7_to_28cm,soil_temperature_28_to_100cm,soil_temperature_100_to_255cm,soil_moisture_0_to_7cm,soil_moisture_7_to_28cm,soil_moisture_28_to_100cm,soil_moisture_100_to_255cm&timezone=America%2FLos_Angeles"
# Download file
if(!file.exists("testOM_hourly_las.json")) {
    fileDownload(fileName="testOM_hourly_las.json", url=testURLHourly)
} else {
    cat("\nFile testOM_hourly_las.json already exists, skipping download\n")
}
## 
## File testOM_hourly_las.json already exists, skipping download
# Daily data download for Las Vegas, NV
testURLDaily <- helperOpenMeteoURL(cityName="Las Vegas NV", 
                                   dailyIndices=1:nrow(tblMetricsDaily),
                                   startDate="2010-01-01", 
                                   endDate="2023-12-31", 
                                   tz="America/Los_Angeles"
                                   )
## 
## Daily metrics created from indices: weathercode,temperature_2m_max,temperature_2m_min,apparent_temperature_max,apparent_temperature_min,precipitation_sum,rain_sum,snowfall_sum,precipitation_hours,sunrise,sunset,windspeed_10m_max,windgusts_10m_max,winddirection_10m_dominant,shortwave_radiation_sum,et0_fao_evapotranspiration
testURLDaily
## [1] "https://archive-api.open-meteo.com/v1/archive?latitude=36.21&longitude=-115.22&start_date=2010-01-01&end_date=2023-12-31&daily=weathercode,temperature_2m_max,temperature_2m_min,apparent_temperature_max,apparent_temperature_min,precipitation_sum,rain_sum,snowfall_sum,precipitation_hours,sunrise,sunset,windspeed_10m_max,windgusts_10m_max,winddirection_10m_dominant,shortwave_radiation_sum,et0_fao_evapotranspiration&timezone=America%2FLos_Angeles"
# Download file
if(!file.exists("testOM_daily_las.json")) {
    fileDownload(fileName="testOM_daily_las.json", url=testURLDaily)
} else {
    cat("\nFile testOM_daily_las.json already exists, skipping download\n")
}
## 
## File testOM_daily_las.json already exists, skipping download

The daily and hourly datasets are loaded:

# Read daily JSON file
lasOMDaily <- formatOpenMeteoJSON("testOM_daily_las.json")
## 
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, daily_units, daily 
## 
## $tblDaily
## # A tibble: 5,113 × 18
##    date       time       weathercode temperature_2m_max temperature_2m_min
##    <date>     <chr>            <int>              <dbl>              <dbl>
##  1 2010-01-01 2010-01-01           2               10.3               -1.3
##  2 2010-01-02 2010-01-02           0               14.2               -0.4
##  3 2010-01-03 2010-01-03           0               14.2                0.7
##  4 2010-01-04 2010-01-04           1               13.3                2.8
##  5 2010-01-05 2010-01-05           1               13.6                0.7
##  6 2010-01-06 2010-01-06           1               15.8                2.5
##  7 2010-01-07 2010-01-07           2               16.1                6  
##  8 2010-01-08 2010-01-08           1               11.2                1.2
##  9 2010-01-09 2010-01-09           1               13.2                0.5
## 10 2010-01-10 2010-01-10           2               15.6                5.9
## # ℹ 5,103 more rows
## # ℹ 13 more variables: apparent_temperature_max <dbl>,
## #   apparent_temperature_min <dbl>, precipitation_sum <dbl>, rain_sum <dbl>,
## #   snowfall_sum <dbl>, precipitation_hours <dbl>, sunrise <chr>, sunset <chr>,
## #   windspeed_10m_max <dbl>, windgusts_10m_max <dbl>,
## #   winddirection_10m_dominant <int>, shortwave_radiation_sum <dbl>,
## #   et0_fao_evapotranspiration <dbl>
## 
## $tblHourly
## NULL
## 
## $tblUnits
## # A tibble: 17 × 4
##    metricType  name                       value      description                
##    <chr>       <chr>                      <chr>      <chr>                      
##  1 daily_units time                       "iso8601"  <NA>                       
##  2 daily_units weathercode                "wmo code" The most severe weather co…
##  3 daily_units temperature_2m_max         "deg C"    Maximum and minimum daily …
##  4 daily_units temperature_2m_min         "deg C"    Maximum and minimum daily …
##  5 daily_units apparent_temperature_max   "deg C"    Maximum and minimum daily …
##  6 daily_units apparent_temperature_min   "deg C"    Maximum and minimum daily …
##  7 daily_units precipitation_sum          "mm"       Sum of daily precipitation…
##  8 daily_units rain_sum                   "mm"       Sum of daily rain          
##  9 daily_units snowfall_sum               "cm"       Sum of daily snowfall      
## 10 daily_units precipitation_hours        "h"        The number of hours with r…
## 11 daily_units sunrise                    "iso8601"  Sun rise and set times     
## 12 daily_units sunset                     "iso8601"  Sun rise and set times     
## 13 daily_units windspeed_10m_max          "km/h"     Maximum wind speed and gus…
## 14 daily_units windgusts_10m_max          "km/h"     Maximum wind speed and gus…
## 15 daily_units winddirection_10m_dominant "deg "     Dominant wind direction    
## 16 daily_units shortwave_radiation_sum    "MJ/m²"    The sum of solar radiaion …
## 17 daily_units et0_fao_evapotranspiration "mm"       Daily sum of ET0 Reference…
## 
## $tblDescription
## # A tibble: 1 × 7
##   latitude longitude generationtime_ms utc_offset_seconds timezone           
##      <dbl>     <dbl>             <dbl>              <int> <chr>              
## 1     36.2     -115.              69.8             -25200 America/Los_Angeles
## # ℹ 2 more variables: timezone_abbreviation <chr>, elevation <dbl>
## 
## 
## latitude: 36.23901
## longitude: -115.1625
## generationtime_ms: 69.77499
## utc_offset_seconds: -25200
## timezone: America/Los_Angeles
## timezone_abbreviation: PDT
## elevation: 686
# Read hourly JSON file
lasTemp <- formatOpenMeteoJSON("testOM_hourly_las.json", addVars=TRUE)
## 
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, hourly_units, hourly 
## 
## $tblDaily
## NULL
## 
## $tblHourly
## # A tibble: 122,712 × 37
##    time                date        hour temperature_2m relativehumidity_2m
##    <dttm>              <date>     <int>          <dbl>               <int>
##  1 2010-01-01 00:00:00 2010-01-01     0            1.3                  53
##  2 2010-01-01 01:00:00 2010-01-01     1            0.5                  56
##  3 2010-01-01 02:00:00 2010-01-01     2            0.1                  56
##  4 2010-01-01 03:00:00 2010-01-01     3           -0.3                  57
##  5 2010-01-01 04:00:00 2010-01-01     4           -0.8                  59
##  6 2010-01-01 05:00:00 2010-01-01     5           -1.1                  60
##  7 2010-01-01 06:00:00 2010-01-01     6           -1.3                  60
##  8 2010-01-01 07:00:00 2010-01-01     7           -1.2                  58
##  9 2010-01-01 08:00:00 2010-01-01     8           -1.2                  56
## 10 2010-01-01 09:00:00 2010-01-01     9           -0.1                  56
## # ℹ 122,702 more rows
## # ℹ 32 more variables: dewpoint_2m <dbl>, apparent_temperature <dbl>,
## #   pressure_msl <dbl>, surface_pressure <dbl>, precipitation <dbl>,
## #   rain <dbl>, snowfall <dbl>, cloudcover <int>, cloudcover_low <int>,
## #   cloudcover_mid <int>, cloudcover_high <int>, shortwave_radiation <dbl>,
## #   direct_radiation <dbl>, direct_normal_irradiance <dbl>,
## #   diffuse_radiation <dbl>, windspeed_10m <dbl>, windspeed_100m <dbl>, …
## 
## $tblUnits
## # A tibble: 34 × 4
##    metricType   name                 value   description                        
##    <chr>        <chr>                <chr>   <chr>                              
##  1 hourly_units time                 iso8601 <NA>                               
##  2 hourly_units temperature_2m       deg C   Air temperature at 2 meters above …
##  3 hourly_units relativehumidity_2m  %       Relative humidity at 2 meters abov…
##  4 hourly_units dewpoint_2m          deg C   Dew point temperature at 2 meters …
##  5 hourly_units apparent_temperature deg C   Apparent temperature is the percei…
##  6 hourly_units pressure_msl         hPa     Atmospheric air pressure reduced t…
##  7 hourly_units surface_pressure     hPa     Atmospheric air pressure reduced t…
##  8 hourly_units precipitation        mm      Total precipitation (rain, showers…
##  9 hourly_units rain                 mm      Only liquid precipitation of the p…
## 10 hourly_units snowfall             cm      Snowfall amount of the preceding h…
## # ℹ 24 more rows
## 
## $tblDescription
## # A tibble: 1 × 7
##   latitude longitude generationtime_ms utc_offset_seconds timezone           
##      <dbl>     <dbl>             <dbl>              <int> <chr>              
## 1     36.2     -115.             7256.             -25200 America/Los_Angeles
## # ℹ 2 more variables: timezone_abbreviation <chr>, elevation <dbl>
## 
## 
## latitude: 36.23901
## longitude: -115.1625
## generationtime_ms: 7256.367
## utc_offset_seconds: -25200
## timezone: America/Los_Angeles
## timezone_abbreviation: PDT
## elevation: 686
## 
## Rows: 122,712
## Columns: 80
## $ time                              <dttm> 2010-01-01 00:00:00, 2010-01-01 01:…
## $ date                              <date> 2010-01-01, 2010-01-01, 2010-01-01,…
## $ hour                              <int> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11…
## $ temperature_2m                    <dbl> 1.3, 0.5, 0.1, -0.3, -0.8, -1.1, -1.…
## $ relativehumidity_2m               <int> 53, 56, 56, 57, 59, 60, 60, 58, 56, …
## $ dewpoint_2m                       <dbl> -7.2, -7.3, -7.6, -7.7, -7.8, -7.9, …
## $ apparent_temperature              <dbl> -2.5, -3.3, -3.6, -4.1, -4.3, -4.7, …
## $ pressure_msl                      <dbl> 1031.2, 1031.1, 1030.8, 1031.7, 1031…
## $ surface_pressure                  <dbl> 947.4, 947.1, 946.7, 947.4, 946.9, 9…
## $ precipitation                     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ rain                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ snowfall                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cloudcover                        <int> 12, 12, 12, 12, 12, 9, 11, 6, 3, 19,…
## $ cloudcover_low                    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cloudcover_mid                    <int> 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 28,…
## $ cloudcover_high                   <int> 40, 40, 40, 39, 40, 29, 32, 19, 10, …
## $ shortwave_radiation               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 76, 240, …
## $ direct_radiation                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 185, …
## $ direct_normal_irradiance          <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0…
## $ diffuse_radiation                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, 55, 6…
## $ windspeed_10m                     <dbl> 5.0, 5.5, 4.7, 4.9, 3.1, 3.5, 3.4, 3…
## $ windspeed_100m                    <dbl> 5.7, 7.2, 6.9, 6.5, 6.3, 6.0, 6.9, 6…
## $ winddirection_10m                 <int> 291, 293, 293, 287, 291, 294, 302, 2…
## $ winddirection_100m                <int> 342, 342, 351, 354, 24, 17, 6, 6, 35…
## $ windgusts_10m                     <dbl> 9.7, 10.1, 10.1, 9.7, 9.0, 9.0, 9.0,…
## $ et0_fao_evapotranspiration        <dbl> 0.01, 0.01, 0.01, 0.01, 0.00, 0.00, …
## $ weathercode                       <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ vapor_pressure_deficit            <dbl> 0.31, 0.28, 0.27, 0.26, 0.24, 0.23, …
## $ soil_temperature_0_to_7cm         <dbl> 0.0, -0.3, -0.6, -0.8, -1.0, -1.1, -…
## $ soil_temperature_7_to_28cm        <dbl> 5.2, 5.1, 5.0, 4.9, 4.7, 4.6, 4.5, 4…
## $ soil_temperature_28_to_100cm      <dbl> 10.2, 10.2, 10.2, 10.2, 10.2, 10.2, …
## $ soil_temperature_100_to_255cm     <dbl> 21.3, 21.3, 21.3, 21.3, 21.3, 21.3, …
## $ soil_moisture_0_to_7cm            <dbl> 0.069, 0.069, 0.069, 0.069, 0.069, 0…
## $ soil_moisture_7_to_28cm           <dbl> 0.126, 0.126, 0.126, 0.126, 0.126, 0…
## $ soil_moisture_28_to_100cm         <dbl> 0.142, 0.142, 0.142, 0.142, 0.142, 0…
## $ soil_moisture_100_to_255cm        <dbl> 0.12, 0.12, 0.12, 0.12, 0.12, 0.12, …
## $ origTime                          <chr> "2010-01-01T00:00", "2010-01-01T01:0…
## $ year                              <dbl> 2010, 2010, 2010, 2010, 2010, 2010, …
## $ month                             <fct> Jan, Jan, Jan, Jan, Jan, Jan, Jan, J…
## $ fct_hour                          <fct> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11…
## $ tod                               <fct> Night, Night, Night, Night, Night, N…
## $ doy                               <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ season                            <fct> Winter, Winter, Winter, Winter, Wint…
## $ todSeason                         <fct> Winter-Night, Winter-Night, Winter-N…
## $ pct_hour                          <dbl> 0, 4, 8, 13, 17, 21, 25, 29, 33, 38,…
## $ pct_temperature_2m                <dbl> 2, 1, 1, 1, 1, 1, 0, 0, 0, 1, 4, 10,…
## $ pct_relativehumidity_2m           <dbl> 87, 88, 88, 89, 90, 91, 91, 90, 88, …
## $ pct_dewpoint_2m                   <dbl> 23, 22, 21, 21, 20, 20, 19, 18, 17, …
## $ pct_apparent_temperature          <dbl> 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 10,…
## $ pct_pressure_msl                  <dbl> 99, 99, 99, 99, 99, 99, 99, 99, 99, …
## $ pct_surface_pressure              <dbl> 98, 98, 98, 98, 98, 98, 98, 98, 98, …
## $ pct_precipitation                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_rain                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_snowfall                      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_cloudcover                    <dbl> 65, 65, 65, 65, 65, 62, 64, 59, 54, …
## $ pct_cloudcover_low                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_cloudcover_mid                <dbl> 0, 0, 0, 0, 0, 0, 73, 0, 0, 0, 70, 8…
## $ pct_cloudcover_high               <dbl> 75, 75, 75, 74, 75, 71, 72, 68, 64, …
## $ pct_shortwave_radiation           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 64, 7…
## $ pct_direct_radiation              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 57, 66, 7…
## $ pct_direct_normal_irradiance      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 61, 74, 8…
## $ pct_diffuse_radiation             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 54, 59, 6…
## $ pct_windspeed_10m                 <dbl> 27, 33, 24, 27, 10, 14, 13, 11, 19, …
## $ pct_windspeed_100m                <dbl> 24, 33, 31, 29, 28, 26, 31, 31, 27, …
## $ pct_winddirection_10m             <dbl> 75, 76, 76, 74, 75, 76, 79, 77, 74, …
## $ pct_winddirection_100m            <dbl> 94, 94, 96, 97, 7, 5, 1, 1, 96, 93, …
## $ pct_windgusts_10m                 <dbl> 15, 17, 17, 15, 12, 12, 12, 12, 9, 1…
## $ pct_et0_fao_evapotranspiration    <dbl> 5, 5, 5, 5, 0, 0, 0, 0, 0, 10, 37, 5…
## $ pct_weathercode                   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 72,…
## $ pct_vapor_pressure_deficit        <dbl> 5, 4, 4, 4, 3, 3, 3, 3, 3, 4, 7, 19,…
## $ pct_soil_temperature_0_to_7cm     <dbl> 2, 2, 1, 1, 1, 1, 1, 1, 0, 1, 2, 4, …
## $ pct_soil_temperature_7_to_28cm    <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, …
## $ pct_soil_temperature_28_to_100cm  <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ pct_soil_temperature_100_to_255cm <dbl> 44, 44, 44, 44, 44, 44, 44, 44, 44, …
## $ pct_soil_moisture_0_to_7cm        <dbl> 88, 88, 88, 88, 88, 88, 88, 88, 88, …
## $ pct_soil_moisture_7_to_28cm       <dbl> 75, 75, 75, 75, 75, 75, 75, 75, 75, …
## $ pct_soil_moisture_28_to_100cm     <dbl> 64, 64, 64, 64, 64, 64, 64, 64, 64, …
## $ pct_soil_moisture_100_to_255cm    <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, …
## $ pct_year                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_doy                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …

## # A tibble: 8 × 4
##   todSeason    season tod       n
##   <fct>        <fct>  <fct> <int>
## 1 Spring-Day   Spring Day   15456
## 2 Spring-Night Spring Night 15456
## 3 Summer-Day   Summer Day   15456
## 4 Summer-Night Summer Night 15456
## 5 Fall-Day     Fall   Day   15288
## 6 Fall-Night   Fall   Night 15288
## 7 Winter-Day   Winter Day   15156
## 8 Winter-Night Winter Night 15156
## # A tibble: 24 × 4
##     hour fct_hour tod       n
##    <int> <fct>    <fct> <int>
##  1     0 0        Night  5113
##  2     1 1        Night  5113
##  3     2 2        Night  5113
##  4     3 3        Night  5113
##  5     4 4        Night  5113
##  6     5 5        Night  5113
##  7     6 6        Night  5113
##  8     7 7        Day    5113
##  9     8 8        Day    5113
## 10     9 9        Day    5113
## 11    10 10       Day    5113
## 12    11 11       Day    5113
## 13    12 12       Day    5113
## 14    13 13       Day    5113
## 15    14 14       Day    5113
## 16    15 15       Day    5113
## 17    16 16       Day    5113
## 18    17 17       Day    5113
## 19    18 18       Day    5113
## 20    19 19       Night  5113
## 21    20 20       Night  5113
## 22    21 21       Night  5113
## 23    22 22       Night  5113
## 24    23 23       Night  5113
## # A tibble: 12 × 3
##    month season     n
##    <fct> <fct>  <int>
##  1 Jan   Winter 10416
##  2 Feb   Winter  9480
##  3 Mar   Spring 10416
##  4 Apr   Spring 10080
##  5 May   Spring 10416
##  6 Jun   Summer 10080
##  7 Jul   Summer 10416
##  8 Aug   Summer 10416
##  9 Sep   Fall   10080
## 10 Oct   Fall   10416
## 11 Nov   Fall   10080
## 12 Dec   Winter 10416

An integrated set of all-city test and train data is updated:

# Bind all the data frames
allCity <- list("NYC"=nycTemp, 
                "LA"=laxTemp, 
                "Chicago"=chiTemp, 
                "Houston"=houTemp, 
                "Vegas"=lasTemp
                ) %>%
    bind_rows(.id="src")

# Create the index for training data
set.seed(24070113)
idxTrain_v2 <- sample(1:nrow(allCity), size = round(0.7*nrow(allCity)), replace=FALSE)

# Add test-train flag to full dataset
allCity <- allCity %>%
    mutate(tt=ifelse(row_number() %in% idxTrain_v2, "train", "test"), 
           fct_src=factor(src))
allCity
## # A tibble: 608,784 × 83
##    src   time                date        hour temperature_2m relativehumidity_2m
##    <chr> <dttm>              <date>     <int>          <dbl>               <int>
##  1 NYC   2010-01-01 00:00:00 2010-01-01     0           -1.1                  95
##  2 NYC   2010-01-01 01:00:00 2010-01-01     1           -1                    96
##  3 NYC   2010-01-01 02:00:00 2010-01-01     2           -1                    96
##  4 NYC   2010-01-01 03:00:00 2010-01-01     3           -0.8                  97
##  5 NYC   2010-01-01 04:00:00 2010-01-01     4           -0.9                  97
##  6 NYC   2010-01-01 05:00:00 2010-01-01     5           -0.8                  97
##  7 NYC   2010-01-01 06:00:00 2010-01-01     6           -0.7                  97
##  8 NYC   2010-01-01 07:00:00 2010-01-01     7           -0.5                  97
##  9 NYC   2010-01-01 08:00:00 2010-01-01     8           -0.6                  97
## 10 NYC   2010-01-01 09:00:00 2010-01-01     9           -0.6                  97
## # ℹ 608,774 more rows
## # ℹ 77 more variables: dewpoint_2m <dbl>, apparent_temperature <dbl>,
## #   pressure_msl <dbl>, surface_pressure <dbl>, precipitation <dbl>,
## #   rain <dbl>, snowfall <dbl>, cloudcover <int>, cloudcover_low <int>,
## #   cloudcover_mid <int>, cloudcover_high <int>, shortwave_radiation <dbl>,
## #   direct_radiation <dbl>, direct_normal_irradiance <dbl>,
## #   diffuse_radiation <dbl>, windspeed_10m <dbl>, windspeed_100m <dbl>, …
# Review counts by year
allCity %>% 
    count(year, src, tt) %>% 
    pivot_wider(id_cols=c("src", "tt"), names_from="year", values_from="n")
## # A tibble: 10 × 16
##    src     tt    `2010` `2011` `2012` `2013` `2014` `2015` `2016` `2017` `2018`
##    <chr>   <chr>  <int>  <int>  <int>  <int>  <int>  <int>  <int>  <int>  <int>
##  1 Chicago test    2569   2593   2572   2660   2623   2591   2583   2679   2692
##  2 Chicago train   6191   6167   6212   6100   6137   6169   6201   6081   6068
##  3 Houston test    2687   2539   2612   2665   2675   2607   2652   2686   2662
##  4 Houston train   6073   6221   6172   6095   6085   6153   6132   6074   6098
##  5 LA      test    2565   2607   2588   2674   2627   2641   2685   2650   2655
##  6 LA      train   6195   6153   6196   6086   6133   6119   6099   6110   6105
##  7 NYC     test    2633   2602   2622   2623   2672   2583   2603   2607   2670
##  8 NYC     train   6127   6158   6162   6137   6088   6177   6181   6153   6090
##  9 Vegas   test    2582   2528   2642   2619   2633   2587   2650   2679   2618
## 10 Vegas   train   6178   6232   6142   6141   6127   6173   6134   6081   6142
## # ℹ 5 more variables: `2019` <int>, `2020` <int>, `2021` <int>, `2022` <int>,
## #   `2023` <int>

Distributions of several key variables are explored:

keyVars <- c('temperature_2m', 
             'relativehumidity_2m', 
             'dewpoint_2m', 
             'shortwave_radiation', 
             'vapor_pressure_deficit', 
             'soil_temperature_28_to_100cm', 
             'soil_temperature_100_to_255cm', 
             'soil_moisture_28_to_100cm', 
             'soil_moisture_100_to_255cm'
             )

allCity %>%
    colSelector(vecSelect=c("src", keyVars)) %>%
    pivot_longer(cols=-c(src)) %>%
    ggplot(aes(x=src, y=value)) + 
    geom_boxplot(aes(fill=src)) + 
    facet_wrap(~name, scales="free_y") + 
    labs(x=NULL, y=NULL, title="Distribution of Key Metrics by City") + 
    scale_fill_discrete(NULL)

Las Vegas stands out for especially low relative humidity (even relative to LA), as well as dry soil (similar to LA)

The scatter of temperature and dewpoint is also explored:

allCity %>% 
    select(t=temperature_2m, d=dewpoint_2m, src) %>% 
    mutate(across(.cols=where(is.numeric), .fns=function(x) round(x))) %>% 
    count(src, t, d) %>% 
    ggplot(aes(x=t, y=d)) + 
    geom_point(aes(size=n, color=src), alpha=0.5) + 
    geom_smooth(aes(color=src, weight=n), method="lm") +
    labs(x="Temperature (C)", y="Dewpoint (C)", title="Temperature vs. Dewpoint", subtitle="Hourly") + 
    scale_color_discrete(NULL) + 
    scale_size_continuous("# Obs")
## `geom_smooth()` using formula = 'y ~ x'

allCity %>% 
    group_by(src) %>%
    summarize(cor_td=cor(temperature_2m, dewpoint_2m))
## # A tibble: 5 × 2
##   src     cor_td
##   <chr>    <dbl>
## 1 Chicago  0.950
## 2 Houston  0.834
## 3 LA       0.273
## 4 NYC      0.919
## 5 Vegas    0.371

Las Vegas is similar to LA, with lower dewpoints. The more humid cities have 80%+ correlation between temperature and dewpoint, dropping to ~40% correlation in the drier cities

Models for predicting city (one with soil temperature, one without) are saved using data without Las Vegas, for application to the new Las Vegas data:

# Run with all variables
rfCityFull <- runFullRF(allCity %>% 
                            mutate(fct_src=factor(src)) %>% 
                            filter(year<2022, tt=="train", src!="Vegas"), 
                        yVar="fct_src", 
                        xVars=varsTrain, 
                        dfTest=allCity %>% 
                            mutate(fct_src=factor(src)) %>% 
                            filter(year==2022, tt=="test", src!="Vegas"), 
                        isContVar=FALSE, 
                        returnData=TRUE
                        )
## Warning: Dropped unused factor level(s) in dependent variable: Vegas.
## Growing trees.. Progress: 97%. Estimated remaining time: 0 seconds.

## 
## Accuracy of test data is: 100%

predictRF(rfCityFull$rf, df=allCity %>% filter(src=="Vegas")) %>% count(pred)
## # A tibble: 1 × 2
##   pred       n
##   <fct>  <int>
## 1 LA    122712
# Run without moisture variables
rfCityNoMoisture <- runFullRF(allCity %>% 
                                  mutate(fct_src=factor(src)) %>% 
                                  filter(year<2022, tt=="train", src!="Vegas"), 
                              yVar="fct_src", 
                              xVars=varsTrain[!grepl(pattern="moist", x=varsTrain)],
                              dfTest=allCity %>% 
                                  mutate(fct_src=factor(src)) %>% 
                                  filter(year==2022, tt=="test", src!="Vegas"), 
                              isContVar=FALSE, 
                              returnData=TRUE
                              )
## Warning: Dropped unused factor level(s) in dependent variable: Vegas.
## Growing trees.. Progress: 65%. Estimated remaining time: 16 seconds.

## 
## Accuracy of test data is: 98.725%

predictRF(rfCityNoMoisture$rf, df=houTemp) %>% count(pred)
## # A tibble: 1 × 2
##   pred         n
##   <fct>    <int>
## 1 Houston 122712

The previously trained random forest models overwhelmingly predict Las Vegas as Los Angeles (if soil moisture is included) or Houston (if soil moisture is excluded)

The linear approximation for estimating temperature based on dewpoint and relative humidity is applied:

ggMiniTempLAS <- predict(lmMiniTemp, 
                         newdata=allCity %>% 
                             filter(src=="Vegas", tt=="test", year==2022) %>%
                             select(rh=relativehumidity_2m, d=dewpoint_2m)
                         ) %>% 
    mutate(allCity %>% filter(src=="Vegas", tt=="test", year==2022) %>% select(temperature_2m), 
           pred=., 
           err=pred-temperature_2m, 
           err2=err**2, 
           rnd5=round(temperature_2m/5)*5
    ) %>% 
    group_by(rnd5) %>% 
    summarize(n=n(), across(.cols=where(is.numeric), .fns=mean))
ggMiniTempLAS
## # A tibble: 11 × 6
##     rnd5     n temperature_2m   pred     err     err2
##    <dbl> <dbl>          <dbl>  <dbl>   <dbl>    <dbl>
##  1    -5     1          -2.7  -2.95   -0.252   0.0633
##  2     0    48           1.09  0.469  -0.616   2.42  
##  3     5   264           5.52  3.97   -1.55    6.82  
##  4    10   406           9.96  6.39   -3.56   23.1   
##  5    15   345          14.7   8.61   -6.11   54.3   
##  6    20   294          20.1  11.4    -8.65   98.2   
##  7    25   370          25.2  16.8    -8.41  110.    
##  8    30   407          29.8  20.4    -9.37  142.    
##  9    35   274          34.8  22.6   -12.3   202.    
## 10    40   119          39.7  23.1   -16.6   298.    
## 11    45     9          43.5  22.8   -20.7   430.
ggMiniTempLAS %>% 
    summarize(mse=sum(n*err2)/sum(n)) %>% 
    mutate(rmse=sqrt(mse))
## # A tibble: 1 × 2
##     mse  rmse
##   <dbl> <dbl>
## 1  99.3  9.97
ggMiniTempLAS %>% 
    select(rnd5, temperature_2m, pred) %>%
    pivot_longer(cols=-c(rnd5)) %>%
    ggplot(aes(x=rnd5, y=value)) + 
    geom_line(aes(group=name, 
                  color=c("pred"="Predicted Mean", "temperature_2m"="Actual Mean")[name]
                  )
              ) + 
    labs(title="Actual vs. Predicted Temperature Using Old City Linear Model on New City Data", 
         x="New city actual temperature (rounded to nearest 5)", 
         y="Average temperature for metric"
         ) + 
    scale_color_discrete("Metric") + 
    geom_abline(slope=1, intercept=0, lty=2)

The linear approximation based on dewpoint and relative humidity is inaccurate for predicting temperatures in Las Vegas, consistent with Las Vegas having T/D trends very different from originally modeled cities, NYC and Chicago

Las Vegas is meaningfully different from NYC and Chicago on key predictors:

tmpPlotData <- allCity %>% 
    select(src, relativehumidity_2m, dewpoint_2m, temperature_2m) %>% 
    mutate(across(where(is.numeric), .fns=round)) %>% 
    count(src, relativehumidity_2m, dewpoint_2m, temperature_2m)

tmpPlotData %>%
    count(src, temperature_2m, dewpoint_2m, wt=n) %>%
    ggplot(aes(x=temperature_2m, y=dewpoint_2m)) + 
    geom_point(aes(color=src, size=n), alpha=0.2) + 
    geom_smooth(aes(color=src, weight=n), method="lm") +
    labs(title="T/D by city")
## `geom_smooth()` using formula = 'y ~ x'

tmpPlotData %>%
    count(src, temperature_2m, relativehumidity_2m, wt=n) %>%
    ggplot(aes(x=temperature_2m, y=relativehumidity_2m)) + 
    geom_point(aes(color=src, size=n), alpha=0.1) + 
    geom_smooth(aes(color=src, weight=n), method="lm") +
    labs(title="T/RH by city")
## `geom_smooth()` using formula = 'y ~ x'

The existing random forest model, trained on NYC and Chicago, is also tested on Las Vegas temperatures:

# Temperature predictions for Vegas
runFullRF(yVar="temperature_2m", 
          useExistingRF=rfTemp2m$rf, 
          dfTest=allCity %>% filter(tt=="test", year==2022, src=="Vegas"), 
          useLabel="Las Vegas temperature predictions", 
          useSub="Las Vegas", 
          isContVar=TRUE,
          rndTo=0.5, 
          refXY=TRUE
          )
## 
## R-squared of Las Vegas temperature predictions is: 90.29% (RMSE 3.32 vs. 10.65 null)
## `geom_smooth()` using formula = 'y ~ x'

The random forest is more accurate than the linear model in predicting temperatures in Las Vegas based on training data from other cities. RMSE is ~3 rather than the ~10 from the linear model application

All combinations of two variables are explored for predicting temperature on a smaller training dataset:

# Train and test data
dfTrainTemp <- allCity %>% 
    filter(!(src %in% c("Vegas")), tt=="train", year<2022) %>% 
    mutate(fct_src=factor(src))
dfTestTemp <- allCity %>% 
    filter(!(src %in% c("Vegas")), tt=="test", year==2022) %>% 
    mutate(fct_src=factor(src))

# Variables to explore
possTempVars <- c(varsTrain[!str_detect(varsTrain, "^temp|ature$")], "month", "tod")

# Subsets to use
set.seed(24070815)
idxSmallTemp <- sample(1:nrow(dfTrainTemp), 5000, replace=FALSE)
mtxSmallTemp <- matrix(nrow=0, ncol=3)

for(idx1 in 1:(length(possTempVars)-1)) {
    for(idx2 in (idx1+1):length(possTempVars)) {
        r2SmallTemp <- runFullRF(dfTrain=dfTrainTemp[idxSmallTemp,], 
                                 yVar="temperature_2m", 
                                 xVars=possTempVars[c(idx1, idx2)], 
                                 dfTest=dfTestTemp, 
                                 useLabel=keyLabel, 
                                 useSub=stringr::str_to_sentence(keyLabel), 
                                 isContVar=TRUE,
                                 makePlots=FALSE,
                                 returnData=TRUE
                                 )[["rfAcc"]][["r2"]]
        mtxSmallTemp <- rbind(mtxSmallTemp, c(idx1, idx2, r2SmallTemp))
    }
}
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.429% (RMSE 9.9 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.568% (RMSE 5.35 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.904% (RMSE 8.77 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.761% (RMSE 9.32 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.887% (RMSE 10.09 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.689% (RMSE 10.1 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.11% (RMSE 9.92 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.088% (RMSE 9.86 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.741% (RMSE 9.94 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.102% (RMSE 9.64 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.91% (RMSE 10.14 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.741% (RMSE 8.9 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.008% (RMSE 9.19 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.981% (RMSE 9.65 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.124% (RMSE 9.35 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.458% (RMSE 10.01 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.166% (RMSE 10.02 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.926% (RMSE 10.03 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.027% (RMSE 10.03 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.149% (RMSE 10.02 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.99% (RMSE 7.99 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.581% (RMSE 9.67 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.066% (RMSE 7.57 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.913% (RMSE 2.57 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.307% (RMSE 3.56 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.703% (RMSE 5.33 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.437% (RMSE 8.42 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.273% (RMSE 9.74 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.693% (RMSE 9.72 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.371% (RMSE 9.96 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.175% (RMSE 10.13 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.815% (RMSE 10.09 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.027% (RMSE 6.98 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.472% (RMSE 6.86 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.346% (RMSE 10.07 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.67% (RMSE 0.6 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.088% (RMSE 9 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.674% (RMSE 9.61 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.583% (RMSE 10.27 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.53% (RMSE 10.16 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.545% (RMSE 10.05 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.577% (RMSE 10.11 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.095% (RMSE 10.08 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.496% (RMSE 9.84 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.226% (RMSE 10.34 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.277% (RMSE 9.52 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.385% (RMSE 9.57 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.7% (RMSE 9.72 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.058% (RMSE 9.64 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.35% (RMSE 10.57 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.799% (RMSE 10.36 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.856% (RMSE 10.36 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.208% (RMSE 10.34 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.26% (RMSE 10.62 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.901% (RMSE 8.2 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.266% (RMSE 9.85 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 98.932% (RMSE 1.07 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.996% (RMSE 2.55 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 83.519% (RMSE 4.22 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 69.807% (RMSE 5.72 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.043% (RMSE 8.94 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.491% (RMSE 10.06 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.247% (RMSE 9.96 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.563% (RMSE 10.21 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.445% (RMSE 10.83 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.944% (RMSE 10.35 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.033% (RMSE 6.9 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.427% (RMSE 6.71 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.112% (RMSE 10.02 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.703% (RMSE 6.09 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 74.493% (RMSE 5.25 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.963% (RMSE 5.89 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.674% (RMSE 6.35 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.516% (RMSE 6.62 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.282% (RMSE 5.48 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 71.68% (RMSE 5.53 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 69.17% (RMSE 5.78 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.591% (RMSE 6.19 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 77.957% (RMSE 4.88 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.145% (RMSE 4.75 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 78.794% (RMSE 4.79 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.678% (RMSE 5.34 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.79% (RMSE 6.08 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.682% (RMSE 6.09 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.451% (RMSE 5.93 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.143% (RMSE 5.96 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.739% (RMSE 6 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.916% (RMSE 3.46 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.176% (RMSE 5.49 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.885% (RMSE 0.35 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 95.287% (RMSE 2.26 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.311% (RMSE 4.12 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 77.826% (RMSE 4.9 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 69.575% (RMSE 5.74 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 75.127% (RMSE 5.19 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.572% (RMSE 5.45 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 60.311% (RMSE 6.55 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.894% (RMSE 5.42 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.081% (RMSE 6.23 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 69.822% (RMSE 5.71 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 71.887% (RMSE 5.51 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.518% (RMSE 6.37 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 78.956% (RMSE 4.77 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.745% (RMSE 8.96 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.842% (RMSE 9.14 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.151% (RMSE 8.94 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.112% (RMSE 8.88 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.85% (RMSE 8.96 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.306% (RMSE 8.68 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.438% (RMSE 9.16 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.883% (RMSE 8.33 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.09% (RMSE 8.31 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.366% (RMSE 8.49 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.024% (RMSE 8.64 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.767% (RMSE 9.14 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.717% (RMSE 9.08 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.207% (RMSE 9.29 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.618% (RMSE 9.33 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.859% (RMSE 9.25 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.139% (RMSE 7.63 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.623% (RMSE 8.6 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.748% (RMSE 6.84 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.979% (RMSE 2.55 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 80.88% (RMSE 4.55 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 70.025% (RMSE 5.69 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.042% (RMSE 7.85 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.374% (RMSE 9.1 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.767% (RMSE 9.02 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.334% (RMSE 9.11 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.753% (RMSE 9.26 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.253% (RMSE 9.05 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.238% (RMSE 7.26 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.056% (RMSE 6.97 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.286% (RMSE 8.93 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.817% (RMSE 9.6 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.3% (RMSE 9.74 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.245% (RMSE 9.58 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.25% (RMSE 9.46 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.46% (RMSE 9.45 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.289% (RMSE 9.29 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.55% (RMSE 9.78 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.526% (RMSE 8.92 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.502% (RMSE 8.92 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.704% (RMSE 9.08 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.106% (RMSE 9.12 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.484% (RMSE 9.84 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.298% (RMSE 9.85 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.56% (RMSE 9.84 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.027% (RMSE 9.87 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.633% (RMSE 9.89 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.008% (RMSE 8.06 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.242% (RMSE 9.17 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.477% (RMSE 6.94 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.082% (RMSE 2.53 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 80.986% (RMSE 4.54 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 68.676% (RMSE 5.82 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.736% (RMSE 8.27 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.046% (RMSE 9.42 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.806% (RMSE 9.26 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.218% (RMSE 9.46 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.571% (RMSE 9.84 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.86% (RMSE 9.65 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.211% (RMSE 6.88 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.768% (RMSE 6.68 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.445% (RMSE 9.56 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.088% (RMSE 10.24 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.043% (RMSE 10.19 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.587% (RMSE 10.05 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.672% (RMSE 10.21 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.859% (RMSE 9.87 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.948% (RMSE 10.35 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.057% (RMSE 9.47 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.875% (RMSE 9.48 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.887% (RMSE 9.71 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.131% (RMSE 9.75 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.397% (RMSE 10.28 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.15% (RMSE 10.13 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.72% (RMSE 10.1 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.77% (RMSE 10.1 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.215% (RMSE 10.41 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.147% (RMSE 8.69 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.585% (RMSE 10.05 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.717% (RMSE 7.59 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.485% (RMSE 2.85 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.222% (RMSE 4.74 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.135% (RMSE 6.05 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.954% (RMSE 8.7 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.324% (RMSE 9.96 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.715% (RMSE 9.99 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.122% (RMSE 10.24 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.423% (RMSE 10.11 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.41% (RMSE 10.42 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.009% (RMSE 7.5 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.983% (RMSE 7.5 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.19% (RMSE 10.23 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.913% (RMSE 10.2 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.521% (RMSE 10.06 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.228% (RMSE 10.18 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.842% (RMSE 9.88 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.242% (RMSE 10.34 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.786% (RMSE 9.54 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.899% (RMSE 9.54 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.622% (RMSE 9.72 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.202% (RMSE 9.8 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.261% (RMSE 10.23 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.787% (RMSE 10.1 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.425% (RMSE 10.06 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.516% (RMSE 10.06 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.949% (RMSE 10.35 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.915% (RMSE 8.83 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.8% (RMSE 10.09 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.522% (RMSE 7.82 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.046% (RMSE 4.41 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 71.623% (RMSE 5.54 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.81% (RMSE 6.59 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.356% (RMSE 8.8 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.174% (RMSE 9.97 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.885% (RMSE 10.04 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.068% (RMSE 10.29 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.175% (RMSE 10.07 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.351% (RMSE 10.42 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.321% (RMSE 7.83 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.513% (RMSE 7.95 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.181% (RMSE 10.23 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.207% (RMSE 10.02 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.276% (RMSE 10.07 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.164% (RMSE 9.86 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.696% (RMSE 10.15 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.841% (RMSE 9.43 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.143% (RMSE 9.47 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.715% (RMSE 9.66 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.786% (RMSE 9.6 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.242% (RMSE 10.07 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.538% (RMSE 9.95 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.678% (RMSE 9.88 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.324% (RMSE 9.9 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.554% (RMSE 10.16 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.586% (RMSE 8.85 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.349% (RMSE 10.12 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.067% (RMSE 8.05 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 80.189% (RMSE 4.63 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 69.093% (RMSE 5.78 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.81% (RMSE 6.68 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.071% (RMSE 8.76 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.24% (RMSE 9.85 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.185% (RMSE 9.91 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.289% (RMSE 10.12 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.112% (RMSE 9.97 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.54% (RMSE 10.22 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.685% (RMSE 7.94 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.754% (RMSE 8.01 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.191% (RMSE 10.07 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.731% (RMSE 10.1 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.638% (RMSE 9.94 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.92% (RMSE 10.14 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.027% (RMSE 9.47 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.557% (RMSE 9.5 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.768% (RMSE 9.71 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.424% (RMSE 9.57 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.623% (RMSE 10.37 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.59% (RMSE 10.21 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.906% (RMSE 10.14 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.942% (RMSE 10.14 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.691% (RMSE 10.36 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.532% (RMSE 8.67 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.065% (RMSE 9.86 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.031% (RMSE 7.57 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.521% (RMSE 2.65 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 80.598% (RMSE 4.58 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.12% (RMSE 5.96 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.263% (RMSE 8.69 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.47% (RMSE 9.84 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.16% (RMSE 9.75 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.485% (RMSE 9.95 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.039% (RMSE 10.35 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.357% (RMSE 10.17 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.638% (RMSE 7.53 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.487% (RMSE 7.32 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.102% (RMSE 9.97 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.189% (RMSE 9.86 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.59% (RMSE 10.21 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.264% (RMSE 9.52 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.97% (RMSE 9.53 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.181% (RMSE 9.75 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.948% (RMSE 9.65 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.675% (RMSE 10.26 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.074% (RMSE 10.13 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.891% (RMSE 10.09 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.19% (RMSE 10.07 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.818% (RMSE 10.36 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.005% (RMSE 8.7 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.852% (RMSE 9.88 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.358% (RMSE 7.47 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.347% (RMSE 2.88 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 80.012% (RMSE 4.65 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.953% (RMSE 6.07 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.794% (RMSE 8.65 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.705% (RMSE 9.77 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.2% (RMSE 9.75 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.392% (RMSE 9.95 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.316% (RMSE 10.07 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.699% (RMSE 10.21 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.662% (RMSE 7.52 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.94% (RMSE 7.43 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.523% (RMSE 10.06 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.773% (RMSE 9.99 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.936% (RMSE 9.19 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.686% (RMSE 9.2 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.661% (RMSE 9.44 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.668% (RMSE 9.38 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.026% (RMSE 9.97 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.733% (RMSE 9.88 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.329% (RMSE 9.79 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.428% (RMSE 9.79 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.042% (RMSE 10.03 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.997% (RMSE 8.45 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.921% (RMSE 9.65 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.934% (RMSE 7.5 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 91.474% (RMSE 3.04 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.676% (RMSE 4.69 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.358% (RMSE 5.94 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.489% (RMSE 8.48 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.825% (RMSE 9.6 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.482% (RMSE 9.62 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.046% (RMSE 9.86 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.669% (RMSE 9.94 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.469% (RMSE 9.95 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.054% (RMSE 7.42 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.255% (RMSE 7.34 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.485% (RMSE 9.79 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.672% (RMSE 9.55 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.964% (RMSE 9.59 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.018% (RMSE 9.81 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.516% (RMSE 9.78 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.288% (RMSE 10.39 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.913% (RMSE 10.25 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.019% (RMSE 10.19 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.434% (RMSE 10.17 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.001% (RMSE 10.5 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.883% (RMSE 8.77 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.572% (RMSE 10 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.916% (RMSE 7.86 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.409% (RMSE 3.54 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 77.112% (RMSE 4.98 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.798% (RMSE 6.17 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.508% (RMSE 8.79 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.756% (RMSE 10.1 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.603% (RMSE 10.11 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.627% (RMSE 10.32 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.687% (RMSE 10.37 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.273% (RMSE 10.42 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.735% (RMSE 7.73 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.03% (RMSE 7.64 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.086% (RMSE 10.19 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.677% (RMSE 9.72 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.621% (RMSE 9.32 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.785% (RMSE 9.66 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.877% (RMSE 9.48 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.718% (RMSE 9.61 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.868% (RMSE 9.6 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.49% (RMSE 9.56 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.479% (RMSE 9.56 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.869% (RMSE 7.58 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.992% (RMSE 9.19 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.596% (RMSE 7.6 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.366% (RMSE 2.68 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.954% (RMSE 3.61 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 75.429% (RMSE 5.16 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.403% (RMSE 7.69 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.699% (RMSE 9.32 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.297% (RMSE 9.29 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.315% (RMSE 9.51 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.759% (RMSE 9.55 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.278% (RMSE 9.63 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.03% (RMSE 6.9 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.971% (RMSE 6.82 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.862% (RMSE 9.43 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.844% (RMSE 9.2 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.169% (RMSE 9.64 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.689% (RMSE 9.61 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.163% (RMSE 9.64 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.585% (RMSE 9.61 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.932% (RMSE 9.59 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.104% (RMSE 9.58 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.787% (RMSE 7.94 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.386% (RMSE 9.22 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.94% (RMSE 7.5 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.855% (RMSE 2.58 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.061% (RMSE 3.59 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 75.203% (RMSE 5.18 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.167% (RMSE 7.77 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.851% (RMSE 9.37 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.851% (RMSE 9.31 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.322% (RMSE 9.46 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.277% (RMSE 9.69 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.266% (RMSE 9.69 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.637% (RMSE 6.93 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.908% (RMSE 6.83 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.073% (RMSE 9.47 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.719% (RMSE 9.66 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.731% (RMSE 9.88 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.305% (RMSE 9.85 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.25% (RMSE 9.85 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.522% (RMSE 9.78 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.422% (RMSE 9.84 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.267% (RMSE 8.17 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.695% (RMSE 9.44 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.747% (RMSE 7.37 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.935% (RMSE 2.56 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.475% (RMSE 3.68 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.738% (RMSE 5.33 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.936% (RMSE 8.13 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.704% (RMSE 9.55 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.667% (RMSE 9.44 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.314% (RMSE 9.68 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.882% (RMSE 9.82 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.244% (RMSE 9.8 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.438% (RMSE 7.02 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.275% (RMSE 6.88 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.786% (RMSE 9.71 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.431% (RMSE 9.73 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.527% (RMSE 9.78 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.667% (RMSE 9.78 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.917% (RMSE 9.76 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.841% (RMSE 9.82 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.882% (RMSE 8.46 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.762% (RMSE 9.32 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.646% (RMSE 7.67 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.171% (RMSE 2.72 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.649% (RMSE 3.8 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.192% (RMSE 5.39 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.825% (RMSE 7.93 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.773% (RMSE 9.55 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.767% (RMSE 9.49 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.47% (RMSE 9.73 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.618% (RMSE 9.67 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.914% (RMSE 9.82 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.493% (RMSE 7.17 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.696% (RMSE 7 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.165% (RMSE 9.64 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.291% (RMSE 10.42 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.954% (RMSE 10.5 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.332% (RMSE 10.33 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.1% (RMSE 10.34 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.903% (RMSE 8.46 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.123% (RMSE 9.97 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.542% (RMSE 7.6 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.665% (RMSE 2.62 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 80.68% (RMSE 4.57 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.732% (RMSE 6.09 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.843% (RMSE 9.08 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.725% (RMSE 10.21 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.505% (RMSE 10.16 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.44% (RMSE 10.38 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.633% (RMSE 10.94 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.062% (RMSE 10.35 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.063% (RMSE 7.5 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.964% (RMSE 7.21 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.182% (RMSE 10.02 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.131% (RMSE 10.24 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.786% (RMSE 10.26 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.983% (RMSE 10.24 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.804% (RMSE 8.59 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.402% (RMSE 9.79 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.643% (RMSE 7.74 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.705% (RMSE 2.61 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 80.142% (RMSE 4.63 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.855% (RMSE 6.08 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.35% (RMSE 8.99 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.672% (RMSE 10.16 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.579% (RMSE 10.16 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.524% (RMSE 10.27 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.096% (RMSE 10.86 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.885% (RMSE 10.2 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.1% (RMSE 7.56 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.588% (RMSE 7.24 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.393% (RMSE 9.95 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.371% (RMSE 10.57 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.714% (RMSE 10.49 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.888% (RMSE 8.77 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.623% (RMSE 9.78 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.561% (RMSE 7.81 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.832% (RMSE 2.58 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 81.837% (RMSE 4.43 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 68.57% (RMSE 5.83 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.745% (RMSE 8.72 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.956% (RMSE 10.09 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.741% (RMSE 10.1 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.074% (RMSE 10.4 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.185% (RMSE 10.34 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.327% (RMSE 10.17 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.74% (RMSE 7.3 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.287% (RMSE 7.11 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.992% (RMSE 9.92 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.754% (RMSE 10.44 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.618% (RMSE 8.79 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.681% (RMSE 9.77 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.891% (RMSE 7.79 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.861% (RMSE 2.58 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.013% (RMSE 4.41 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 68.121% (RMSE 5.87 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.831% (RMSE 8.71 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.142% (RMSE 10.02 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.857% (RMSE 10.04 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.315% (RMSE 10.42 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.64% (RMSE 10.37 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.713% (RMSE 10.15 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.573% (RMSE 7.39 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.879% (RMSE 7.14 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.715% (RMSE 9.94 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.923% (RMSE 8.52 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.109% (RMSE 10.02 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.701% (RMSE 7.59 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.65% (RMSE 2.62 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 81.967% (RMSE 4.42 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.596% (RMSE 5.92 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.377% (RMSE 8.86 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.408% (RMSE 10.38 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.302% (RMSE 10.23 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.066% (RMSE 10.4 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.751% (RMSE 10.85 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.995% (RMSE 10.45 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.51% (RMSE 7.61 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.323% (RMSE 7.26 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.276% (RMSE 10.12 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.362% (RMSE 8.43 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.634% (RMSE 7.01 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.115% (RMSE 2.52 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 89.72% (RMSE 3.33 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 78.969% (RMSE 4.77 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.809% (RMSE 7.14 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.763% (RMSE 8.53 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.305% (RMSE 8.43 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.562% (RMSE 8.67 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.478% (RMSE 8.73 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.885% (RMSE 8.77 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.747% (RMSE 6.09 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.572% (RMSE 6.01 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.948% (RMSE 8.71 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.827% (RMSE 7.29 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.636% (RMSE 2.82 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 80.757% (RMSE 4.56 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.744% (RMSE 5.91 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.76% (RMSE 8.53 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.877% (RMSE 9.6 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.606% (RMSE 9.55 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.554% (RMSE 9.67 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.801% (RMSE 9.88 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.244% (RMSE 10.07 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.584% (RMSE 7.31 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.405% (RMSE 7.32 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.579% (RMSE 9.94 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 95.146% (RMSE 2.29 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 89.291% (RMSE 3.4 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.955% (RMSE 4.66 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 60.074% (RMSE 6.57 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.112% (RMSE 7.49 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.738% (RMSE 7.3 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.973% (RMSE 7.57 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.898% (RMSE 7.79 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.974% (RMSE 7.92 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 71.973% (RMSE 5.51 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.739% (RMSE 5.43 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.766% (RMSE 7.94 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.139% (RMSE 2.52 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.978% (RMSE 2.55 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.809% (RMSE 2.59 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.028% (RMSE 2.54 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.726% (RMSE 2.61 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.291% (RMSE 2.69 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.555% (RMSE 2.64 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 91.044% (RMSE 3.11 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.807% (RMSE 2.59 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.102% (RMSE 2.73 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 80.584% (RMSE 4.58 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 80.924% (RMSE 4.54 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 80.24% (RMSE 4.62 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.578% (RMSE 4.7 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.431% (RMSE 4.72 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.301% (RMSE 4.73 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 78.716% (RMSE 4.8 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.109% (RMSE 4.75 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.81% (RMSE 4.67 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 80.973% (RMSE 4.54 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 75.944% (RMSE 5.1 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 70.843% (RMSE 5.62 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.566% (RMSE 6.1 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.709% (RMSE 6.18 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.436% (RMSE 6.46 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.479% (RMSE 6.37 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.558% (RMSE 6.1 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 70.774% (RMSE 5.62 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.489% (RMSE 5.36 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 60.888% (RMSE 6.5 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.533% (RMSE 8.54 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.923% (RMSE 8.83 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.66% (RMSE 9.15 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.347% (RMSE 9.51 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.933% (RMSE 8.83 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 69.27% (RMSE 5.77 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 71.425% (RMSE 5.56 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.27% (RMSE 8.62 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.58% (RMSE 9.03 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.775% (RMSE 9.37 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.102% (RMSE 8.76 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.178% (RMSE 9.86 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.943% (RMSE 7.13 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.017% (RMSE 6.9 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.833% (RMSE 9.82 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.51% (RMSE 9.89 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.762% (RMSE 8.53 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.111% (RMSE 9.86 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.187% (RMSE 7.34 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.482% (RMSE 7.02 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.489% (RMSE 9.9 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.092% (RMSE 8.94 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.979% (RMSE 10.66 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.538% (RMSE 8.02 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.519% (RMSE 7.46 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.702% (RMSE 10.15 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.641% (RMSE 10.59 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.967% (RMSE 5.98 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.189% (RMSE 6.05 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.567% (RMSE 9.89 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.239% (RMSE 7.63 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.407% (RMSE 7.54 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.068% (RMSE 10.24 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.415% (RMSE 7.25 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.973% (RMSE 7.85 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.357% (RMSE 7.9 vs. 10.4 null)

Predictive success by metric is explored:

dfSmallR2Temp <- as.data.frame(mtxSmallTemp) %>% 
    purrr::set_names(c("idx1", "idx2", "r2")) %>% 
    tibble::as_tibble() %>% 
    mutate(var1=possTempVars[idx1], var2=possTempVars[idx2], rn=row_number()) 
dfSmallR2Temp %>% arrange(desc(r2)) %>% select(var1, var2, r2) %>% print(n=20)
## # A tibble: 630 × 3
##    var1                       var2                             r2
##    <chr>                      <chr>                         <dbl>
##  1 dewpoint_2m                vapor_pressure_deficit        0.999
##  2 relativehumidity_2m        dewpoint_2m                   0.997
##  3 relativehumidity_2m        vapor_pressure_deficit        0.989
##  4 dewpoint_2m                soil_temperature_0_to_7cm     0.953
##  5 vapor_pressure_deficit     soil_temperature_0_to_7cm     0.951
##  6 soil_temperature_0_to_7cm  soil_temperature_7_to_28cm    0.941
##  7 et0_fao_evapotranspiration soil_temperature_0_to_7cm     0.941
##  8 surface_pressure           soil_temperature_0_to_7cm     0.941
##  9 soil_temperature_0_to_7cm  soil_moisture_0_to_7cm        0.940
## 10 relativehumidity_2m        soil_temperature_0_to_7cm     0.940
## 11 pressure_msl               soil_temperature_0_to_7cm     0.940
## 12 soil_temperature_0_to_7cm  soil_temperature_28_to_100cm  0.940
## 13 direct_normal_irradiance   soil_temperature_0_to_7cm     0.939
## 14 hour                       soil_temperature_0_to_7cm     0.939
## 15 winddirection_100m         soil_temperature_0_to_7cm     0.939
## 16 direct_radiation           soil_temperature_0_to_7cm     0.939
## 17 winddirection_10m          soil_temperature_0_to_7cm     0.938
## 18 soil_temperature_0_to_7cm  soil_temperature_100_to_255cm 0.938
## 19 soil_temperature_0_to_7cm  doy                           0.938
## 20 soil_temperature_0_to_7cm  soil_moisture_7_to_28cm       0.937
## # ℹ 610 more rows
dfSmallR2Temp %>% 
    pivot_longer(cols=c(var1, var2)) %>% 
    group_by(value) %>% 
    summarize(across(r2, .fns=list("min"=min, "mu"=mean, "max"=max))) %>% 
    ggplot(aes(x=fct_reorder(value, r2_mu))) + 
    coord_flip() + 
    geom_point(aes(y=r2_mu)) + 
    geom_errorbar(aes(ymin=r2_min, ymax=r2_max)) + 
    lims(y=c(NA, 1)) + 
    geom_hline(yintercept=1, lty=2, color="red") +
    labs(title="R-squared in every 2-predictor model including self and one other", 
         subtitle="Predicting temperature", 
         y="Range of R-squared (min-mean-max)", 
         x=NULL
    )

dfSmallR2Temp %>% 
    arrange(desc(r2)) %>% 
    filter(var2!="soil_temperature_0_to_7cm", var1!="soil_temperature_0_to_7cm") %>% 
    select(var1, var2, r2) %>% 
    print(n=20)
## # A tibble: 595 × 3
##    var1                       var2                            r2
##    <chr>                      <chr>                        <dbl>
##  1 dewpoint_2m                vapor_pressure_deficit       0.999
##  2 relativehumidity_2m        dewpoint_2m                  0.997
##  3 relativehumidity_2m        vapor_pressure_deficit       0.989
##  4 et0_fao_evapotranspiration soil_temperature_7_to_28cm   0.897
##  5 vapor_pressure_deficit     soil_temperature_7_to_28cm   0.893
##  6 dewpoint_2m                et0_fao_evapotranspiration   0.889
##  7 hour                       soil_temperature_7_to_28cm   0.883
##  8 direct_radiation           soil_temperature_7_to_28cm   0.881
##  9 shortwave_radiation        soil_temperature_7_to_28cm   0.880
## 10 direct_normal_irradiance   soil_temperature_7_to_28cm   0.875
## 11 diffuse_radiation          soil_temperature_7_to_28cm   0.866
## 12 dewpoint_2m                soil_temperature_7_to_28cm   0.843
## 13 relativehumidity_2m        soil_temperature_7_to_28cm   0.835
## 14 winddirection_100m         soil_temperature_7_to_28cm   0.820
## 15 windgusts_10m              soil_temperature_7_to_28cm   0.820
## 16 winddirection_10m          soil_temperature_7_to_28cm   0.818
## 17 surface_pressure           soil_temperature_7_to_28cm   0.810
## 18 soil_temperature_7_to_28cm month                        0.810
## 19 soil_temperature_7_to_28cm soil_temperature_28_to_100cm 0.809
## 20 pressure_msl               soil_temperature_7_to_28cm   0.809
## # ℹ 575 more rows

Select combinations are explored using the full training dataset:

possLargeVars <- c("dewpoint_2m", 
                   "vapor_pressure_deficit", 
                   "relativehumidity_2m", 
                   "soil_temperature_0_to_7cm"
                   )
possLargeVars
## [1] "dewpoint_2m"               "vapor_pressure_deficit"   
## [3] "relativehumidity_2m"       "soil_temperature_0_to_7cm"
mtxLarge <- matrix(nrow=0, ncol=3)

for(idx1 in 1:(length(possLargeVars)-1)) {
    for(idx2 in (idx1+1):length(possLargeVars)) {
        r2LargeTemp <- runFullRF(dfTrain=dfTrainTemp[,], 
                                 yVar="temperature_2m", 
                                 xVars=possLargeVars[c(idx1, idx2)], 
                                 dfTest=dfTestTemp, 
                                 useLabel=keyLabel, 
                                 useSub=stringr::str_to_sentence(keyLabel), 
                                 isContVar=TRUE,
                                 makePlots=FALSE,
                                 returnData=TRUE
                                 )[["rfAcc"]][["r2"]]
        mtxLarge <- rbind(mtxLarge, c(idx1, idx2, r2LargeTemp))
    }
}
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.964% (RMSE 0.2 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.902% (RMSE 0.33 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 95.675% (RMSE 2.16 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.177% (RMSE 0.94 vs. 10.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 95.59% (RMSE 2.18 vs. 10.4 null)
## Growing trees.. Progress: 91%. Estimated remaining time: 2 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.588% (RMSE 2.42 vs. 10.4 null)
dfLargeR2Temp <- as.data.frame(mtxLarge) %>% 
    purrr::set_names(c("idx1", "idx2", "r2")) %>% 
    tibble::as_tibble() %>% 
    mutate(var1=possLargeVars[idx1], var2=possLargeVars[idx2], rn=row_number()) 
dfLargeR2Temp %>% arrange(desc(r2)) %>% select(var1, var2, r2) %>% print(n=20)
## # A tibble: 6 × 3
##   var1                   var2                         r2
##   <chr>                  <chr>                     <dbl>
## 1 dewpoint_2m            vapor_pressure_deficit    1.00 
## 2 dewpoint_2m            relativehumidity_2m       0.999
## 3 vapor_pressure_deficit relativehumidity_2m       0.992
## 4 dewpoint_2m            soil_temperature_0_to_7cm 0.957
## 5 vapor_pressure_deficit soil_temperature_0_to_7cm 0.956
## 6 relativehumidity_2m    soil_temperature_0_to_7cm 0.946

A model using only dewpoint and vapor pressure deficit is run on one city, then applied to the other:

# Train and test data
dfTrainTemp_v2 <- allCity %>% 
    filter(src %in% c("NYC"), tt=="train", year<2022) %>% 
    mutate(fct_src=factor(src))
dfTestTemp_v2 <- allCity %>% 
    filter(tt=="test", year==2022) %>% 
    mutate(fct_src=factor(src))

# Random forest for temperature using dewpoint and vapor pressure deficit
keyLabel <- "predictions based on NYC pre-2022 training data applied to each city in 2022 holdout dataset"
tmpPred_v2 <- runFullRF(dfTrain=dfTrainTemp_v2, 
                        yVar="temperature_2m", 
                        xVars=c("dewpoint_2m", "vapor_pressure_deficit"), 
                        dfTest=dfTestTemp_v2, 
                        useLabel=keyLabel, 
                        useSub=stringr::str_to_sentence(keyLabel), 
                        isContVar=TRUE,
                        makePlots=FALSE,
                        returnData=TRUE
                        )[["tstPred"]] %>%
    select(src, temperature_2m, pred) %>%
    group_by(src) %>%
    summarize(n=n(), 
              tss=sum((temperature_2m-mean(temperature_2m))**2), 
              rss=sum((temperature_2m-pred)**2), 
              r2=1-rss/tss, 
              rmse=sqrt(rss/n),
              berr=sqrt(tss/n)
              )
## 
## R-squared of predictions based on NYC pre-2022 training data applied to each city in 2022 holdout dataset is: 94.65% (RMSE 2.47 vs. 10.69 null)
tmpPred_v2
## # A tibble: 5 × 7
##   src         n     tss    rss    r2  rmse  berr
##   <chr>   <int>   <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 Chicago  2592 356174.   305. 0.999 0.343 11.7 
## 2 Houston  2659 194789.   400. 0.998 0.388  8.56
## 3 LA       2677 127962.  6718. 0.947 1.58   6.91
## 4 NYC      2664 280171.   104. 1.00  0.197 10.3 
## 5 Vegas    2537 287670. 72697. 0.747 5.35  10.6

The model trained on NYC performs well on Chicago, Houston, and LA, while missing significantly on Las Vegas

Patterns in dewpoint and vapor pressure deficit are explored:

dfPlot_v2 <- dfTestTemp_v2 %>% 
    select(src, vapor_pressure_deficit, dewpoint_2m) %>% 
    mutate(across(where(is.numeric), .fns=function(x) round(2*x)/2)) %>% 
    count(src, vapor_pressure_deficit, dewpoint_2m) 

dfPlot_v2 %>% 
    ggplot(aes(y=vapor_pressure_deficit, x=dewpoint_2m)) + 
    geom_point(aes(color=src, size=n), alpha=0.25) + facet_wrap(~src) + 
    scale_color_discrete(NULL)

# Overlap of NYC points by city
tmpNYC <- dfTrainTemp_v2 %>% 
    select(src, vapor_pressure_deficit, dewpoint_2m) %>% 
    mutate(across(where(is.numeric), .fns=function(x) round(2*x)/2)) %>% 
    count(src, vapor_pressure_deficit, dewpoint_2m) %>%
    filter(src=="NYC", n>=10) %>%
    mutate(inNYC=TRUE)

dfPlot_v2 %>%
    left_join(select(tmpNYC, vapor_pressure_deficit, dewpoint_2m, inNYC), 
              by=c("vapor_pressure_deficit", "dewpoint_2m")
              ) %>%
    mutate(inNYC=ifelse(is.na(inNYC), FALSE, inNYC)) %>% 
    ggplot(aes(y=vapor_pressure_deficit, x=dewpoint_2m)) + 
    geom_point(aes(color=inNYC, size=n), alpha=0.25) + facet_wrap(~src) + 
    scale_color_discrete("NYC training\nhas 10+ obs")

dfPlot_v2 %>%
    left_join(select(tmpNYC, vapor_pressure_deficit, dewpoint_2m, inNYC), 
              by=c("vapor_pressure_deficit", "dewpoint_2m")
              ) %>%
    mutate(inNYC=ifelse(is.na(inNYC), FALSE, inNYC)) %>%
    group_by(src) %>%
    summarize(meanNYC=sum(n*inNYC)/sum(n), n=sum(n), nObs=n())
## # A tibble: 5 × 4
##   src     meanNYC     n  nObs
##   <chr>     <dbl> <int> <int>
## 1 Chicago   0.988  2592   335
## 2 Houston   0.936  2659   371
## 3 LA        0.802  2677   490
## 4 NYC       0.990  2664   361
## 5 Vegas     0.355  2537   747

Chicago and NYC are both very well-represented by the training data, while a majority of Las Vegas observations are largely or entirely absent from the training data

There are strong relationships among dewpoint, vapor pressure deficit, relative humidity, and temperature:

dfTestTemp_v2 %>% 
    select(src, vapor_pressure_deficit, dewpoint_2m, temperature_2m, relativehumidity_2m) %>% 
    mutate(across(c(dewpoint_2m, temperature_2m, relativehumidity_2m), .fns=function(x) round(x))) %>% 
    filter(dewpoint_2m %in% c(-10, 0, 10, 20)) %>% 
    ggplot(aes(x=vapor_pressure_deficit, y=temperature_2m)) + 
    geom_point(aes(color=factor(dewpoint_2m))) + 
    scale_color_discrete("Dewpoint")

dfTestTemp_v2 %>% 
    select(src, vapor_pressure_deficit, dewpoint_2m, temperature_2m, relativehumidity_2m) %>% 
    mutate(across(c(dewpoint_2m, temperature_2m, relativehumidity_2m), .fns=function(x) round(x))) %>% 
    filter(dewpoint_2m %in% c(-10, 0, 10, 20)) %>% 
    ggplot(aes(x=relativehumidity_2m, y=temperature_2m)) + 
    geom_point(aes(color=factor(dewpoint_2m))) + 
    scale_color_discrete("Dewpoint")

To better cover the predictor space, a model using only dewpoint and vapor pressure deficit is run on NYC and Vegas, then applied to the others:

# Train and test data
dfTrainTemp_v3 <- allCity %>% 
    filter(src %in% c("NYC", "Vegas"), tt=="train", year<2022) %>% 
    mutate(fct_src=factor(src))
dfTestTemp_v3 <- allCity %>% 
    filter(tt=="test", year==2022) %>% 
    mutate(fct_src=factor(src))

# Random forest for temperature using dewpoint and vapor pressure deficit
keyLabel <- "predictions based on NYC/Vegas pre-2022 training data applied to each city in 2022 holdout dataset"
tmpPred_v3 <- runFullRF(dfTrain=dfTrainTemp_v3, 
                        yVar="temperature_2m", 
                        xVars=c("dewpoint_2m", "vapor_pressure_deficit"), 
                        dfTest=dfTestTemp_v3, 
                        useLabel=keyLabel, 
                        useSub=stringr::str_to_sentence(keyLabel), 
                        isContVar=TRUE,
                        makePlots=FALSE,
                        returnData=TRUE
                        )[["tstPred"]] %>%
    select(src, temperature_2m, pred) %>%
    group_by(src) %>%
    summarize(n=n(), 
              tss=sum((temperature_2m-mean(temperature_2m))**2), 
              rss=sum((temperature_2m-pred)**2), 
              r2=1-rss/tss, 
              rmse=sqrt(rss/n),
              berr=sqrt(tss/n)
              )
## 
## R-squared of predictions based on NYC/Vegas pre-2022 training data applied to each city in 2022 holdout dataset is: 99.959% (RMSE 0.22 vs. 10.69 null)
tmpPred_v3
## # A tibble: 5 × 7
##   src         n     tss   rss    r2  rmse  berr
##   <chr>   <int>   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Chicago  2592 356174. 213.  0.999 0.287 11.7 
## 2 Houston  2659 194789. 258.  0.999 0.312  8.56
## 3 LA       2677 127962.  34.5 1.00  0.113  6.91
## 4 NYC      2664 280171.  66.4 1.00  0.158 10.3 
## 5 Vegas    2537 287670.  43.8 1.00  0.131 10.6

The model trained on NYC and Vegas generally performs very well on all cities

Coverage of the temperature and humidity space by city is explored:

dfTestTemp_v2 %>% 
    select(src, vapor_pressure_deficit, dewpoint_2m, temperature_2m, relativehumidity_2m) %>% 
    mutate(across(c(dewpoint_2m, temperature_2m, relativehumidity_2m), .fns=function(x) round(x))) %>% 
    ggplot(aes(x=dewpoint_2m, y=temperature_2m)) + 
    geom_density2d(data=~filter(., src %in% c("NYC", "Vegas"))) +
    geom_point(data=~count(., src, temperature_2m, dewpoint_2m), 
               aes(color=src, size=n), 
               alpha=0.25
               ) + 
    scale_color_discrete(NULL) + 
    labs(title="Relationships between temperature and depoint", 
         subtitle="Contours from geom_density_2d() use only NYC and Las Vegas data"
         )

Modeling using NYC and Las Vegas data may not fully cover the coldest and driest portions of the Chicago space

The model using only NYC and Las Vegas is applied to Chicago, with accuracy explored by temperature:

# Train and test data
dfTrainTemp_v3 <- allCity %>% 
    filter(src %in% c("NYC", "Vegas"), tt=="train", year<2022) %>% 
    mutate(fct_src=factor(src))
dfTestTemp_v3 <- allCity %>% 
    filter(tt=="test", year==2022) %>% 
    mutate(fct_src=factor(src))

# Random forest for temperature using dewpoint and vapor pressure deficit
keyLabel <- "predictions based on NYC/Vegas pre-2022 training data applied to each city in 2022 holdout dataset"
tmpPred_v3_df <- runFullRF(dfTrain=dfTrainTemp_v3, 
                           yVar="temperature_2m", 
                           xVars=c("dewpoint_2m", "vapor_pressure_deficit"), 
                           dfTest=dfTestTemp_v3, 
                           useLabel=keyLabel, 
                           useSub=stringr::str_to_sentence(keyLabel), 
                           isContVar=TRUE,
                           makePlots=FALSE,
                           returnData=TRUE
                           )[["tstPred"]] 
## 
## R-squared of predictions based on NYC/Vegas pre-2022 training data applied to each city in 2022 holdout dataset is: 99.959% (RMSE 0.22 vs. 10.69 null)
tmpPred_v3_df %>%
    select(src, temperature_2m, pred) %>%
    group_by(src) %>%
    summarize(n=n(), 
              tss=sum((temperature_2m-mean(temperature_2m))**2), 
              rss=sum((temperature_2m-pred)**2), 
              r2=1-rss/tss, 
              rmse=sqrt(rss/n),
              berr=sqrt(tss/n)
              )
## # A tibble: 5 × 7
##   src         n     tss   rss    r2  rmse  berr
##   <chr>   <int>   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Chicago  2592 356174. 242.  0.999 0.305 11.7 
## 2 Houston  2659 194789. 228.  0.999 0.293  8.56
## 3 LA       2677 127962.  34.6 1.00  0.114  6.91
## 4 NYC      2664 280171.  64.1 1.00  0.155 10.3 
## 5 Vegas    2537 287670.  42.3 1.00  0.129 10.6
ggMiniTempCHI <- tmpPred_v3_df %>% 
    select(src, temperature_2m, pred) %>%
    filter(src=="Chicago") %>%
    mutate(err=pred-temperature_2m, 
           err2=err**2, 
           rnd5=round(temperature_2m/5)*5
    ) %>% 
    group_by(rnd5) %>% 
    summarize(n=n(), across(.cols=where(is.numeric), .fns=mean)) %>%
    mutate(pcterr2=n*err2/sum(n*err2))
ggMiniTempCHI
## # A tibble: 13 × 7
##     rnd5     n temperature_2m    pred      err     err2 pcterr2
##    <dbl> <dbl>          <dbl>   <dbl>    <dbl>    <dbl>   <dbl>
##  1   -25     2        -24.0   -16.4    7.70    59.4     0.491  
##  2   -20    19        -19.0   -17.6    1.34     2.54    0.200  
##  3   -15    43        -14.3   -14.0    0.333    0.164   0.0291 
##  4   -10   129         -9.94   -9.72   0.223    0.0970  0.0518 
##  5    -5   247         -4.80   -4.61   0.188    0.0720  0.0736 
##  6     0   321          0.192   0.269  0.0766   0.0239  0.0317 
##  7     5   356          4.69    4.69   0.00503  0.0119  0.0175 
##  8    10   284          9.82    9.79  -0.0294   0.0253  0.0298 
##  9    15   308         14.9    14.9   -0.00369  0.00893 0.0114 
## 10    20   480         20.2    20.1   -0.0391   0.0106  0.0210 
## 11    25   303         24.6    24.6   -0.0730   0.0236  0.0295 
## 12    30    90         29.1    29.0   -0.0939   0.0260  0.00969
## 13    35    10         34.7    34.4   -0.270    0.105   0.00435
ggMiniTempCHI %>% 
    summarize(mse=sum(n*err2)/sum(n)) %>% 
    mutate(rmse=sqrt(mse))
## # A tibble: 1 × 2
##      mse  rmse
##    <dbl> <dbl>
## 1 0.0933 0.305
ggMiniTempCHI %>% 
    select(rnd5, temperature_2m, pred) %>%
    pivot_longer(cols=-c(rnd5)) %>%
    ggplot(aes(x=rnd5, y=value)) + 
    geom_line(aes(group=name, 
                  color=c("pred"="Predicted Mean", "temperature_2m"="Actual Mean")[name]
                  )
              ) + 
    labs(title="Actual vs. Predicted Temperature Using NYC/Vegas Random Forest Model on Chicago Data", 
         x="Chicago actual temperature (rounded to nearest 5)", 
         y="Average temperature for metric"
         ) + 
    scale_color_discrete("Metric") + 
    geom_abline(slope=1, intercept=0, lty=2)

As expected, predictions are excellent in the space covered by the training data and poor for the small number of very cold observations never seen in training. Around 60% of MSE in Chicago temperature predictions occurs in the 23 test data observations where temperature (rounded to nearest 5 degrees C) is -20C or colder

The model using only NYC and Las Vegas is applied to Houston, with accuracy explored by temperature:

ggMiniTempHOU <- tmpPred_v3_df %>% 
    select(src, temperature_2m, pred) %>%
    filter(src=="Houston") %>%
    mutate(err=pred-temperature_2m, 
           err2=err**2, 
           rnd5=round(temperature_2m/5)*5
    ) %>% 
    group_by(rnd5) %>% 
    summarize(n=n(), across(.cols=where(is.numeric), .fns=mean)) %>%
    mutate(pcterr2=n*err2/sum(n*err2))
ggMiniTempHOU
## # A tibble: 11 × 7
##     rnd5     n temperature_2m   pred      err    err2  pcterr2
##    <dbl> <dbl>          <dbl>  <dbl>    <dbl>   <dbl>    <dbl>
##  1   -10     2         -8.15  -7.88   0.270   0.103   0.000904
##  2    -5    12         -3.8   -3.57   0.233   0.0711  0.00374 
##  3     0    44          0.448  0.497  0.0490  0.0190  0.00366 
##  4     5   179          5.44   5.43  -0.00674 0.00870 0.00682 
##  5    10   304         10.0   10.0   -0.0150  0.0183  0.0243  
##  6    15   279         15.1   15.1   -0.0164  0.00935 0.0114  
##  7    20   495         20.3   20.2   -0.0439  0.0108  0.0234  
##  8    25   755         25.0   24.8   -0.279   0.164   0.541   
##  9    30   442         29.6   29.4   -0.191   0.0781  0.151   
## 10    35   144         34.4   34.0   -0.415   0.310   0.195   
## 11    40     3         38.0   36.3   -1.71    2.92    0.0383
ggMiniTempHOU %>% 
    summarize(mse=sum(n*err2)/sum(n)) %>% 
    mutate(rmse=sqrt(mse))
## # A tibble: 1 × 2
##      mse  rmse
##    <dbl> <dbl>
## 1 0.0859 0.293
ggMiniTempHOU %>% 
    select(rnd5, temperature_2m, pred) %>%
    pivot_longer(cols=-c(rnd5)) %>%
    ggplot(aes(x=rnd5, y=value)) + 
    geom_line(aes(group=name, 
                  color=c("pred"="Predicted Mean", "temperature_2m"="Actual Mean")[name]
                  )
              ) + 
    labs(title="Actual vs. Predicted Temperature Using NYC/Vegas Random Forest Model on Houston Data", 
         x="Houston actual temperature (rounded to nearest 5)", 
         y="Average temperature for metric"
         ) + 
    scale_color_discrete("Metric") + 
    geom_abline(slope=1, intercept=0, lty=2)

As expected, predictions are excellent in the space covered by the training data and miss only with the very hottest observations never seen in training

The model using only NYC and Las Vegas is applied to Los Angeles, with accuracy explored by temperature:

ggMiniTempLA <- tmpPred_v3_df %>% 
    select(src, temperature_2m, pred) %>%
    filter(src=="LA") %>%
    mutate(err=pred-temperature_2m, 
           err2=err**2, 
           rnd5=round(temperature_2m/5)*5
    ) %>% 
    group_by(rnd5) %>% 
    summarize(n=n(), across(.cols=where(is.numeric), .fns=mean)) %>%
    mutate(pcterr2=n*err2/sum(n*err2))
ggMiniTempLA
## # A tibble: 9 × 7
##    rnd5     n temperature_2m  pred      err    err2 pcterr2
##   <dbl> <dbl>          <dbl> <dbl>    <dbl>   <dbl>   <dbl>
## 1     0    11          0.991  1.05  0.0590  0.0195  0.00620
## 2     5   127          5.89   5.87 -0.0172  0.0127  0.0467 
## 3    10   617         10.2   10.3   0.0386  0.0206  0.368  
## 4    15   783         15.0   15.0  -0.0121  0.0107  0.241  
## 5    20   578         19.7   19.7  -0.0220  0.00852 0.142  
## 6    25   309         24.9   24.9  -0.00717 0.00572 0.0511 
## 7    30   199         29.7   29.7  -0.0382  0.00684 0.0394 
## 8    35    48         34.6   34.5  -0.0932  0.0224  0.0311 
## 9    40     5         39.6   39.2  -0.457   0.512   0.0740
ggMiniTempLA %>% 
    summarize(mse=sum(n*err2)/sum(n)) %>% 
    mutate(rmse=sqrt(mse))
## # A tibble: 1 × 2
##      mse  rmse
##    <dbl> <dbl>
## 1 0.0129 0.114
ggMiniTempLA %>% 
    select(rnd5, temperature_2m, pred) %>%
    pivot_longer(cols=-c(rnd5)) %>%
    ggplot(aes(x=rnd5, y=value)) + 
    geom_line(aes(group=name, 
                  color=c("pred"="Predicted Mean", "temperature_2m"="Actual Mean")[name]
                  )
              ) + 
    labs(title="Actual vs. Predicted Temperature Using NYC/Vegas Random Forest Model on LA Data", 
         x="Los Angeles actual temperature (rounded to nearest 5)", 
         y="Average temperature for metric"
         ) + 
    scale_color_discrete("Metric") + 
    geom_abline(slope=1, intercept=0, lty=2)

As expected, predictions are excellent since the entire LA space is covered by the training data

An approximate formula for relative humidity is assessed for resonance with the data:

# Approximate formula for relative humidity
# Source https://www.omnicalculator.com/physics/relative-humidity
calcRH <- function(t, d, c1=17.63, c2=243) {
    100 * exp((c1*d)/(c2+d)) / exp((c1*t)/(c2+t))
}

# Applied to sample data
dfTestTemp_v3 %>%
    select(src, t=temperature_2m, d=dewpoint_2m, rh=relativehumidity_2m) %>%
    mutate(crh=calcRH(t, d)) %>%
    ggplot(aes(x=rh, y=crh)) + 
    geom_point(aes(color=src)) + 
    facet_wrap(~src) + 
    geom_smooth(method="lm") + 
    geom_abline(intercept=0, slope=1, lty=2) + 
    labs(x="Reported relative humidity", 
         y="Formula relative humidity", 
         title="Relative humidity by formula from temperature and dewpoint vs. reported in raw data") + 
    scale_color_discrete(NULL)
## `geom_smooth()` using formula = 'y ~ x'

The formula is an exact match to the reported data, allowing the random forest to find the correct third value when given two of T, D, RH, provided that the training space also includes that combination

Example training data is created for all temperatures and dew points between -30 and 50 (rounded to the nearest 1), with RH calculated based on formula:

# Sample dataset
rhTrain <- expand.grid(t=seq(-30, 50, by=1), d=seq(-30, 50, by=1)) %>% 
    tibble::as_tibble() %>% 
    filter(d<=t) %>% 
    mutate(rh=calcRH(t, d))
rhTrain
## # A tibble: 3,321 × 3
##        t     d    rh
##    <dbl> <dbl> <dbl>
##  1   -30   -30 100  
##  2   -29   -30  91.0
##  3   -28   -30  82.9
##  4   -27   -30  75.6
##  5   -26   -30  69.0
##  6   -25   -30  63.0
##  7   -24   -30  57.6
##  8   -23   -30  52.7
##  9   -22   -30  48.3
## 10   -21   -30  44.2
## # ℹ 3,311 more rows
# Training and testing (mtry=1)
rhOut <- rhTrain %>%
    bind_rows(.,.,.,.,.,.,.,.,.,.) %>% 
    runFullRF(dfTrain=., 
              yVar=c("t"), 
              xVars=c("rh", "d"), 
              isContVar=TRUE, 
              refXY=TRUE, 
              mtry=1, 
              dfTest=allCity %>%
                  filter(tt=="test") %>%
                  select(src, t=temperature_2m, d=dewpoint_2m, rh=relativehumidity_2m), 
              rndTo=1, 
              returnData=TRUE
              )

## 
## R-squared of test data is: 98.741% (RMSE 1.17 vs. 10.39 null)
## `geom_smooth()` using formula = 'y ~ x'

rhOut <- rhOut$tstPred
rhOut
## # A tibble: 182,635 × 5
##    src       t     d    rh  pred
##    <chr> <dbl> <dbl> <int> <dbl>
##  1 NYC    -1    -1.6    96  1.58
##  2 NYC    -0.8  -1.2    97  1.21
##  3 NYC    -0.7  -1.1    97  1.21
##  4 NYC    -0.6  -1      97  1.21
##  5 NYC     4.8   0.4    73  4.54
##  6 NYC     1.7  -0.4    86  3.84
##  7 NYC    -1.8  -6.2    72 -1.91
##  8 NYC    -2    -9.9    55 -1.50
##  9 NYC    -3.7 -13.1    48 -3.47
## 10 NYC    -8.7 -17.4    49 -7.59
## # ℹ 182,625 more rows
# Errors by city
rhOut %>%
    group_by(src) %>%
    summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
    mutate(rmse=sqrt(e2), r2=1-e2/e2Base)
## # A tibble: 5 × 7
##   src        e2      mu     n e2Base  rmse    r2
##   <chr>   <dbl>   <dbl> <int>  <dbl> <dbl> <dbl>
## 1 Chicago 1.60  -0.462  36557  125.  1.26  0.987
## 2 Houston 1.95  -0.369  36998   60.4 1.40  0.968
## 3 LA      1.22  -0.322  36972   51.9 1.11  0.976
## 4 NYC     1.63  -0.432  35474  102.  1.28  0.984
## 5 Vegas   0.398  0.0241 36634  110.  0.631 0.996
# Errors by RH
rhOut %>%
    mutate(rh5=round(rh/5)*5) %>%
    group_by(rh5) %>%
    summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
    mutate(rmse=sqrt(e2), r2=1-e2/e2Base, e2pct=n*e2/sum(n*e2)) %>%
    print(n=25)
## # A tibble: 21 × 8
##      rh5    e2      mu     n e2Base  rmse    r2     e2pct
##    <dbl> <dbl>   <dbl> <int>  <dbl> <dbl> <dbl>     <dbl>
##  1     0 6.57  -2.40       3   74.4 2.56  0.912 0.0000794
##  2     5 1.06  -0.0228  2151   46.2 1.03  0.977 0.00915  
##  3    10 0.462  0.122   6179   61.0 0.680 0.992 0.0115   
##  4    15 0.356  0.236   6865   64.8 0.596 0.995 0.00984  
##  5    20 0.243  0.113   6235   67.6 0.493 0.996 0.00610  
##  6    25 0.190 -0.0796  5763   76.5 0.436 0.998 0.00441  
##  7    30 0.193  0.0212  5964   92.0 0.439 0.998 0.00462  
##  8    35 0.205  0.116   6306  105.  0.453 0.998 0.00521  
##  9    40 0.207  0.146   7230  112.  0.455 0.998 0.00603  
## 10    45 0.192  0.0265  8165  116.  0.438 0.998 0.00632  
## 11    50 0.233 -0.149   9269  113.  0.483 0.998 0.00870  
## 12    55 0.248 -0.0917  9997  114.  0.498 0.998 0.00996  
## 13    60 0.263 -0.0551 10919  112.  0.512 0.998 0.0115   
## 14    65 0.334 -0.109  11278  115.  0.578 0.997 0.0152   
## 15    70 0.454 -0.305  12057  111.  0.674 0.996 0.0220   
## 16    75 0.464 -0.303  12812  108.  0.681 0.996 0.0239   
## 17    80 0.746 -0.536  13248  102.  0.864 0.993 0.0398   
## 18    85 3.09  -1.13   13982   85.8 1.76  0.964 0.174    
## 19    90 6.40  -0.484  15304   69.7 2.53  0.908 0.395    
## 20    95 3.80  -1.10   14419   52.8 1.95  0.928 0.221    
## 21   100 0.897 -0.838   4489   35.9 0.947 0.975 0.0162

Training data rounds temperature to the nearest degree and RH always rounds to the nearest percent, making temperature predictions commonly off by a fraction of a degree. The model is generally accurate, with the exception of very low relative humidities (rounding is much more impactful) and very high relative humidities (mtry=1 creates challenges since grid-based training data overweights some T/D combinations).

The example training data is modified to be more consistent with T/D typically observed:

# Sample of T/D in cities
set.seed(24072114)
tdAll <- allCity %>%
    select(t=temperature_2m, d=dewpoint_2m) %>%
    slice(sample(1:nrow(.), round(nrow(.)/10), replace=TRUE)) %>%
    mutate(across(where(is.numeric), .fns=round)) %>%
    count(t, d)
tdAll
## # A tibble: 1,814 × 3
##        t     d     n
##    <dbl> <dbl> <int>
##  1   -30   -34     1
##  2   -29   -34     1
##  3   -26   -32     1
##  4   -26   -30     1
##  5   -25   -29     1
##  6   -24   -28     2
##  7   -24   -27     1
##  8   -22   -28     1
##  9   -22   -26     1
## 10   -22   -25     5
## # ℹ 1,804 more rows
# Examples of real-world occurence
tdAll %>%
    ggplot(aes(x=d, y=t)) + 
    geom_point(aes(size=n), alpha=0.25) + 
    labs(title="Sample (10%) of 5-city temperature and dew points")

# Training and testing (mtry=1) weighted by real-world occurence
rhOut_wtd <- rhTrain %>%
    left_join(tdAll, by=c("t", "d")) %>%
    mutate(n=ifelse(is.na(n), 5, n+5)) %>%
    runFullRF(dfTrain=., 
              yVar=c("t"), 
              xVars=c("rh", "d"), 
              isContVar=TRUE, 
              refXY=TRUE, 
              mtry=1, 
              case.weights="n",
              dfTest=allCity %>%
                  filter(tt=="test") %>%
                  select(src, t=temperature_2m, d=dewpoint_2m, rh=relativehumidity_2m), 
              rndTo=1, 
              returnData=TRUE
              )

## 
## R-squared of test data is: 98.678% (RMSE 1.2 vs. 10.39 null)
## `geom_smooth()` using formula = 'y ~ x'

rhOut_wtd <- rhOut_wtd$tstPred
rhOut_wtd
## # A tibble: 182,635 × 5
##    src       t     d    rh  pred
##    <chr> <dbl> <dbl> <int> <dbl>
##  1 NYC    -1    -1.6    96  1.87
##  2 NYC    -0.8  -1.2    97  1.88
##  3 NYC    -0.7  -1.1    97  1.88
##  4 NYC    -0.6  -1      97  1.88
##  5 NYC     4.8   0.4    73  4.93
##  6 NYC     1.7  -0.4    86  3.19
##  7 NYC    -1.8  -6.2    72 -1.47
##  8 NYC    -2    -9.9    55 -1.69
##  9 NYC    -3.7 -13.1    48 -3.25
## 10 NYC    -8.7 -17.4    49 -7.82
## # ℹ 182,625 more rows
# Errors by city
rhOut_wtd %>%
    group_by(src) %>%
    summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
    mutate(rmse=sqrt(e2), r2=1-e2/e2Base)
## # A tibble: 5 × 7
##   src        e2      mu     n e2Base  rmse    r2
##   <chr>   <dbl>   <dbl> <int>  <dbl> <dbl> <dbl>
## 1 Chicago 1.50  -0.553  36557  125.  1.22  0.988
## 2 Houston 2.23  -0.541  36998   60.4 1.49  0.963
## 3 LA      1.40  -0.467  36972   51.9 1.18  0.973
## 4 NYC     1.69  -0.562  35474  102.  1.30  0.983
## 5 Vegas   0.331 -0.0175 36634  110.  0.575 0.997
# Errors by RH
rhOut_wtd %>%
    mutate(rh5=round(rh/5)*5) %>%
    group_by(rh5) %>%
    summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
    mutate(rmse=sqrt(e2), r2=1-e2/e2Base, e2pct=n*e2/sum(n*e2)) %>%
    print(n=25)
## # A tibble: 21 × 8
##      rh5    e2       mu     n e2Base  rmse    r2     e2pct
##    <dbl> <dbl>    <dbl> <int>  <dbl> <dbl> <dbl>     <dbl>
##  1     0 5.93  -2.24        3   74.4 2.44  0.920 0.0000682
##  2     5 1.04  -0.0480   2151   46.2 1.02  0.978 0.00854  
##  3    10 0.454  0.106    6179   61.0 0.674 0.993 0.0107   
##  4    15 0.301  0.195    6865   64.8 0.548 0.995 0.00791  
##  5    20 0.194  0.0202   6235   67.6 0.440 0.997 0.00463  
##  6    25 0.168 -0.0755   5763   76.5 0.410 0.998 0.00371  
##  7    30 0.161 -0.0931   5964   92.0 0.401 0.998 0.00368  
##  8    35 0.140 -0.00134  6306  105.  0.375 0.999 0.00339  
##  9    40 0.143  0.0280   7230  112.  0.378 0.999 0.00396  
## 10    45 0.140 -0.0439   8165  116.  0.374 0.999 0.00438  
## 11    50 0.158 -0.141    9269  113.  0.397 0.999 0.00561  
## 12    55 0.167 -0.109    9997  114.  0.409 0.999 0.00641  
## 13    60 0.190 -0.152   10919  112.  0.436 0.998 0.00796  
## 14    65 0.246 -0.161   11278  115.  0.496 0.998 0.0107   
## 15    70 0.318 -0.315   12057  111.  0.564 0.997 0.0147   
## 16    75 0.339 -0.374   12812  108.  0.582 0.997 0.0166   
## 17    80 0.555 -0.496   13248  102.  0.745 0.995 0.0282   
## 18    85 1.34  -0.739   13982   85.8 1.16  0.984 0.0721   
## 19    90 5.06  -0.731   15304   69.7 2.25  0.927 0.297    
## 20    95 7.28  -1.84    14419   52.8 2.70  0.862 0.402    
## 21   100 5.09  -2.18     4489   35.9 2.26  0.858 0.0876

The weighted training data performs slightly better for data points with high density, at the offset of somewhat worse performance for less commonly observed relative humidities

Example training data is expanded all temperatures and dew points between -50 and 50 (rounded to the nearest 1), with RH calculated based on formula:

# Sample dataset
rhTrain_ex <- expand.grid(t=seq(-50, 50, by=1), d=seq(-50, 50, by=1)) %>% 
    tibble::as_tibble() %>% 
    filter(d<=t) %>% 
    mutate(rh=calcRH(t, d))
rhTrain_ex
## # A tibble: 5,151 × 3
##        t     d    rh
##    <dbl> <dbl> <dbl>
##  1   -50   -50 100  
##  2   -49   -50  89.2
##  3   -48   -50  79.6
##  4   -47   -50  71.2
##  5   -46   -50  63.7
##  6   -45   -50  57.1
##  7   -44   -50  51.2
##  8   -43   -50  46.0
##  9   -42   -50  41.3
## 10   -41   -50  37.2
## # ℹ 5,141 more rows
# Sample of T/D in cities from previous code section (frame 'tdAll')
# Training and testing (mtry=1) weighted by real-world occurence
rhOut_wtd_ex <- rhTrain_ex %>%
    left_join(tdAll, by=c("t", "d")) %>%
    mutate(n=ifelse(is.na(n), 5, n+5)) %>%
    runFullRF(dfTrain=., 
              yVar=c("t"), 
              xVars=c("rh", "d"), 
              isContVar=TRUE, 
              refXY=TRUE, 
              mtry=1, 
              case.weights="n",
              dfTest=allCity %>%
                  filter(tt=="test") %>%
                  select(src, t=temperature_2m, d=dewpoint_2m, rh=relativehumidity_2m), 
              rndTo=1, 
              returnData=TRUE
              )

## 
## R-squared of test data is: 98.789% (RMSE 1.14 vs. 10.39 null)
## `geom_smooth()` using formula = 'y ~ x'

rhOut_wtd_ex <- rhOut_wtd_ex$tstPred
rhOut_wtd_ex
## # A tibble: 182,635 × 5
##    src       t     d    rh  pred
##    <chr> <dbl> <dbl> <int> <dbl>
##  1 NYC    -1    -1.6    96  3.16
##  2 NYC    -0.8  -1.2    97  3.18
##  3 NYC    -0.7  -1.1    97  3.18
##  4 NYC    -0.6  -1      97  3.18
##  5 NYC     4.8   0.4    73  4.78
##  6 NYC     1.7  -0.4    86  2.60
##  7 NYC    -1.8  -6.2    72 -1.38
##  8 NYC    -2    -9.9    55 -1.57
##  9 NYC    -3.7 -13.1    48 -3.27
## 10 NYC    -8.7 -17.4    49 -7.45
## # ℹ 182,625 more rows
# Errors by city
rhOut_wtd_ex %>%
    group_by(src) %>%
    summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
    mutate(rmse=sqrt(e2), r2=1-e2/e2Base)
## # A tibble: 5 × 7
##   src        e2      mu     n e2Base  rmse    r2
##   <chr>   <dbl>   <dbl> <int>  <dbl> <dbl> <dbl>
## 1 Chicago 1.32  -0.597  36557  125.  1.15  0.989
## 2 Houston 2.28  -0.665  36998   60.4 1.51  0.962
## 3 LA      1.10  -0.399  36972   51.9 1.05  0.979
## 4 NYC     1.53  -0.606  35474  102.  1.24  0.985
## 5 Vegas   0.306 -0.0457 36634  110.  0.553 0.997
# Errors by RH
rhOut_wtd_ex %>%
    mutate(rh5=round(rh/5)*5) %>%
    group_by(rh5) %>%
    summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
    mutate(rmse=sqrt(e2), r2=1-e2/e2Base, e2pct=n*e2/sum(n*e2)) %>%
    print(n=25)
## # A tibble: 21 × 8
##      rh5    e2       mu     n e2Base  rmse    r2     e2pct
##    <dbl> <dbl>    <dbl> <int>  <dbl> <dbl> <dbl>     <dbl>
##  1     0 5.76  -2.24        3   74.4 2.40  0.923 0.0000724
##  2     5 1.01   0.0847   2151   46.2 1.01  0.978 0.00911  
##  3    10 0.417  0.0328   6179   61.0 0.646 0.993 0.0108   
##  4    15 0.245 -0.0138   6865   64.8 0.495 0.996 0.00704  
##  5    20 0.184  0.0197   6235   67.6 0.429 0.997 0.00480  
##  6    25 0.171  0.117    5763   76.5 0.413 0.998 0.00412  
##  7    30 0.157  0.0106   5964   92.0 0.396 0.998 0.00392  
##  8    35 0.139 -0.0553   6306  105.  0.373 0.999 0.00366  
##  9    40 0.143 -0.0675   7230  112.  0.378 0.999 0.00433  
## 10    45 0.132 -0.00440  8165  116.  0.363 0.999 0.00450  
## 11    50 0.157 -0.137    9269  113.  0.396 0.999 0.00609  
## 12    55 0.161 -0.109    9997  114.  0.401 0.999 0.00674  
## 13    60 0.191 -0.166   10919  112.  0.436 0.998 0.00871  
## 14    65 0.261 -0.300   11278  115.  0.511 0.998 0.0123   
## 15    70 0.296 -0.302   12057  111.  0.544 0.997 0.0150   
## 16    75 0.359 -0.349   12812  108.  0.599 0.997 0.0193   
## 17    80 0.451 -0.462   13248  102.  0.672 0.996 0.0250   
## 18    85 1.02  -0.690   13982   85.8 1.01  0.988 0.0597   
## 19    90 3.03  -0.935   15304   69.7 1.74  0.956 0.194    
## 20    95 9.09  -2.20    14419   52.8 3.02  0.828 0.549    
## 21   100 2.75  -1.60     4489   35.9 1.66  0.923 0.0517
# Errors by RH (plotted)
rhOut_wtd_ex %>%
    mutate(rh5=round(rh/5)*5) %>%
    group_by(rh5) %>%
    summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
    mutate(rmse=sqrt(e2), r2=1-e2/e2Base, e2pct=n*e2/sum(n*e2)) %>%
    select(rh5, rmse, r2) %>%
    pivot_longer(cols=-c(rh5)) %>%
    ggplot(aes(x=rh5, y=value)) + 
    geom_point(aes(color=name)) + 
    facet_wrap(~name, ncol=1, scales="free_y") + 
    labs(title="R-squared and RMSE of temperature predictions by relative humidity", 
         x="Reported relative humidity (rounded to nearest 5)", 
         y=NULL
         ) + 
    scale_color_discrete(NULL)

The expanded training data improves prediction quality at very low temperatures. Predictions continue to be less accurate at very low, and very high, relative humidities

Rounding is a meaningful challenge for some temperature predictions given training data that rounds temperature and dewpoint to the nearest 1:

# Prediction error summaries (most of the significant errors occur when RH is 90+)
rhOut_wtd_ex %>% mutate(delta=abs(pred-t)) %>% summary()
##      src                  t                d                 rh        
##  Length:182635      Min.   :-31.10   Min.   :-35.400   Min.   :  2.00  
##  Class :character   1st Qu.:  9.10   1st Qu.: -1.300   1st Qu.: 42.00  
##  Mode  :character   Median : 16.90   Median :  7.200   Median : 65.00  
##                     Mean   : 16.22   Mean   :  6.615   Mean   : 61.05  
##                     3rd Qu.: 23.90   3rd Qu.: 14.900   3rd Qu.: 83.00  
##                     Max.   : 45.80   Max.   : 27.200   Max.   :100.00  
##       pred             delta         
##  Min.   :-30.404   Min.   :0.000001  
##  1st Qu.:  9.645   1st Qu.:0.203006  
##  Median : 17.260   Median :0.441588  
##  Mean   : 16.681   Mean   :0.737171  
##  3rd Qu.: 24.339   3rd Qu.:0.866279  
##  Max.   : 45.730   Max.   :5.065160
rhOut_wtd_ex %>% mutate(delta=abs(pred-t)) %>% filter(delta>1.5) %>% summary()
##      src                  t                d                rh       
##  Length:23492       Min.   :-22.20   Min.   :-27.50   Min.   :  2.0  
##  Class :character   1st Qu.:  9.40   1st Qu.:  7.80   1st Qu.: 90.0  
##  Mode  :character   Median : 16.40   Median : 14.80   Median : 94.0  
##                     Mean   : 15.23   Mean   : 13.41   Mean   : 91.5  
##                     3rd Qu.: 21.70   3rd Qu.: 20.40   3rd Qu.: 96.0  
##                     Max.   : 45.10   Max.   : 26.80   Max.   :100.0  
##       pred            delta      
##  Min.   :-20.59   Min.   :1.500  
##  1st Qu.: 11.77   1st Qu.:1.767  
##  Median : 18.45   Median :2.263  
##  Mean   : 17.43   Mean   :2.642  
##  3rd Qu.: 23.59   3rd Qu.:3.584  
##  Max.   : 45.73   Max.   :5.065
# Sample dataset
rhTrain_hl <- expand.grid(t=seq(-25, 50, by=0.1), d=seq(-20, 20, by=10)) %>% 
    tibble::as_tibble() %>% 
    filter(d<=t) %>% 
    mutate(rh=calcRH(t, d), rndt=round(t), rndrh=round(rh))
rhTrain_hl
## # A tibble: 2,505 × 5
##        t     d    rh  rndt rndrh
##    <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 -20     -20 100     -20   100
##  2 -19.9   -20  99.1   -20    99
##  3 -19.8   -20  98.3   -20    98
##  4 -19.7   -20  97.5   -20    97
##  5 -19.6   -20  96.6   -20    97
##  6 -19.5   -20  95.8   -20    96
##  7 -19.4   -20  95.0   -19    95
##  8 -19.3   -20  94.2   -19    94
##  9 -19.2   -20  93.4   -19    93
## 10 -19.1   -20  92.6   -19    93
## # ℹ 2,495 more rows
# Examples for rndt==d
rhTrain_hl %>%
    filter(rndt==d) %>%
    print(n=40)
## # A tibble: 30 × 5
##          t     d    rh  rndt rndrh
##      <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 -20       -20 100     -20   100
##  2 -19.9     -20  99.1   -20    99
##  3 -19.8     -20  98.3   -20    98
##  4 -19.7     -20  97.5   -20    97
##  5 -19.6     -20  96.6   -20    97
##  6 -19.5     -20  95.8   -20    96
##  7 -10       -10 100     -10   100
##  8  -9.9     -10  99.2   -10    99
##  9  -9.8     -10  98.4   -10    98
## 10  -9.7     -10  97.7   -10    98
## 11  -9.6     -10  96.9   -10    97
## 12  -9.5     -10  96.1   -10    96
## 13   0         0 100       0   100
## 14   0.100     0  99.3     0    99
## 15   0.200     0  98.6     0    99
## 16   0.300     0  97.8     0    98
## 17   0.400     0  97.1     0    97
## 18   0.5       0  96.4     0    96
## 19  10        10 100      10   100
## 20  10.1      10  99.3    10    99
## 21  10.2      10  98.7    10    99
## 22  10.3      10  98.0    10    98
## 23  10.4      10  97.4    10    97
## 24  10.5      10  96.7    10    97
## 25  20        20 100      20   100
## 26  20.1      20  99.4    20    99
## 27  20.2      20  98.8    20    99
## 28  20.3      20  98.2    20    98
## 29  20.4      20  97.6    20    98
## 30  20.5      20  97.0    20    97
# Examples for rndrh==1
rhTrain_hl %>%
    filter(rndrh<=5) %>%
    group_by(d, rndrh) %>%
    summarize(maxt=max(t), meant=mean(t), mint=min(t), n=n(), .groups="drop")
## # A tibble: 10 × 6
##        d rndrh  maxt meant  mint     n
##    <dbl> <dbl> <dbl> <dbl> <dbl> <int>
##  1   -20     1  50    46.2  42.4    77
##  2   -20     2  42.3  37.7  33      94
##  3   -20     3  32.9  30.1  27.2    58
##  4   -20     4  27.1  25.1  23      42
##  5   -20     5  22.9  21.3  19.7    33
##  6   -10     2  50    49.2  48.5    16
##  7   -10     3  48.4  45.2  42      65
##  8   -10     4  41.9  39.6  37.3    47
##  9   -10     5  37.2  35.4  33.7    36
## 10     0     5  50    49.0  47.9    22

As temperature and dewpoint converge (high relative humidity), the same rounded value of temperature can occur with RH that spans as much as ~4%. Greater granularity in the training data may help address this. As relative humidity gets very low, a given dewpoint can be associated with over 5 degrees of temperature variation for the same rounded value of RH. Since the raw data has rounded RH, this may be a harder constraint, though extremely low relative humidity is uncommon so this may not be a major driver of overall RMSE

Training data is updated to include 0.2 degree granularity for temperature and dewpoint:

# Sample dataset
rhTrain_02 <- expand.grid(t=seq(-50, 50, by=0.2), d=seq(-50, 50, by=0.2)) %>% 
    tibble::as_tibble() %>% 
    filter(d<=t) %>% 
    mutate(rh=calcRH(t, d))
rhTrain_02
## # A tibble: 125,751 × 3
##        t     d    rh
##    <dbl> <dbl> <dbl>
##  1 -50     -50 100  
##  2 -49.8   -50  97.7
##  3 -49.6   -50  95.5
##  4 -49.4   -50  93.4
##  5 -49.2   -50  91.2
##  6 -49     -50  89.2
##  7 -48.8   -50  87.2
##  8 -48.6   -50  85.2
##  9 -48.4   -50  83.3
## 10 -48.2   -50  81.5
## # ℹ 125,741 more rows
# Training and testing (mtry=1) - NOT weighted by real-world occurence
rhOut_02 <- rhTrain_02 %>%
    runFullRF(dfTrain=., 
              yVar=c("t"), 
              xVars=c("rh", "d"), 
              isContVar=TRUE, 
              refXY=TRUE, 
              mtry=1, 
              dfTest=allCity %>%
                  filter(tt=="test") %>%
                  select(src, t=temperature_2m, d=dewpoint_2m, rh=relativehumidity_2m), 
              rndTo=1, 
              returnData=TRUE
              )

## 
## R-squared of test data is: 99.843% (RMSE 0.41 vs. 10.39 null)
## `geom_smooth()` using formula = 'y ~ x'

rhOut_02 <- rhOut_02$tstPred
rhOut_02
## # A tibble: 182,635 × 5
##    src       t     d    rh   pred
##    <chr> <dbl> <dbl> <int>  <dbl>
##  1 NYC    -1    -1.6    96 -0.345
##  2 NYC    -0.8  -1.2    97 -0.405
##  3 NYC    -0.7  -1.1    97 -0.398
##  4 NYC    -0.6  -1      97 -0.311
##  5 NYC     4.8   0.4    73  4.89 
##  6 NYC     1.7  -0.4    86  1.73 
##  7 NYC    -1.8  -6.2    72 -1.60 
##  8 NYC    -2    -9.9    55 -2.07 
##  9 NYC    -3.7 -13.1    48 -3.79 
## 10 NYC    -8.7 -17.4    49 -8.58 
## # ℹ 182,625 more rows
# Errors by city
rhOut_02 %>%
    group_by(src) %>%
    summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
    mutate(rmse=sqrt(e2), r2=1-e2/e2Base)
## # A tibble: 5 × 7
##   src        e2      mu     n e2Base  rmse    r2
##   <chr>   <dbl>   <dbl> <int>  <dbl> <dbl> <dbl>
## 1 Chicago 0.120 -0.113  36557  125.  0.346 0.999
## 2 Houston 0.230 -0.155  36998   60.4 0.479 0.996
## 3 LA      0.205 -0.0963 36972   51.9 0.453 0.996
## 4 NYC     0.160 -0.123  35474  102.  0.400 0.998
## 5 Vegas   0.131  0.0466 36634  110.  0.362 0.999
# Errors by RH
rhOut_02 %>%
    mutate(rh5=round(rh/5)*5) %>%
    group_by(rh5) %>%
    summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
    mutate(rmse=sqrt(e2), r2=1-e2/e2Base, e2pct=n*e2/sum(n*e2)) %>%
    print(n=25)
## # A tibble: 21 × 8
##      rh5     e2       mu     n e2Base  rmse    r2    e2pct
##    <dbl>  <dbl>    <dbl> <int>  <dbl> <dbl> <dbl>    <dbl>
##  1     0 9.13   -2.74        3   74.4 3.02  0.877 0.000885
##  2     5 0.887  -0.00572  2151   46.2 0.942 0.981 0.0616  
##  3    10 0.273   0.0711   6179   61.0 0.523 0.996 0.0546  
##  4    15 0.121   0.0351   6865   64.8 0.348 0.998 0.0268  
##  5    20 0.0737  0.0802   6235   67.6 0.272 0.999 0.0149  
##  6    25 0.0547  0.105    5763   76.5 0.234 0.999 0.0102  
##  7    30 0.0348  0.0531   5964   92.0 0.187 1.00  0.00670 
##  8    35 0.0272  0.0174   6306  105.  0.165 1.00  0.00554 
##  9    40 0.0231  0.0342   7230  112.  0.152 1.00  0.00541 
## 10    45 0.0253  0.0829   8165  116.  0.159 1.00  0.00667 
## 11    50 0.0187  0.0565   9269  113.  0.137 1.00  0.00561 
## 12    55 0.0173  0.0325   9997  114.  0.131 1.00  0.00557 
## 13    60 0.0139 -0.0159  10919  112.  0.118 1.00  0.00492 
## 14    65 0.0140  0.00561 11278  115.  0.118 1.00  0.00511 
## 15    70 0.0153  0.0111  12057  111.  0.124 1.00  0.00595 
## 16    75 0.0214 -0.0571  12812  108.  0.146 1.00  0.00887 
## 17    80 0.0259 -0.0857  13248  102.  0.161 1.00  0.0111  
## 18    85 0.0329 -0.0999  13982   85.8 0.181 1.00  0.0149  
## 19    90 0.0641 -0.196   15304   69.7 0.253 0.999 0.0317  
## 20    95 0.272  -0.374   14419   52.8 0.522 0.995 0.127   
## 21   100 4.04   -1.87     4489   35.9 2.01  0.888 0.586

The model performs very well, with the exception of some remaining RMSE mainly for very high RH. Allowing both predictors (RH, D) to be used at the same time is forced using mtry=2:

rhOut_02_mt2 <- rhTrain_02 %>%
    runFullRF(dfTrain=., 
              yVar=c("t"), 
              xVars=c("rh", "d"), 
              isContVar=TRUE, 
              refXY=TRUE, 
              mtry=2, 
              dfTest=allCity %>%
                  filter(tt=="test") %>%
                  select(src, t=temperature_2m, d=dewpoint_2m, rh=relativehumidity_2m), 
              rndTo=1, 
              returnData=TRUE
    )
## Growing trees.. Progress: 96%. Estimated remaining time: 1 seconds.

## 
## R-squared of test data is: 99.962% (RMSE 0.2 vs. 10.39 null)
## `geom_smooth()` using formula = 'y ~ x'

rhOut_02_mt2 <- rhOut_02_mt2$tstPred
rhOut_02_mt2
## # A tibble: 182,635 × 5
##    src       t     d    rh   pred
##    <chr> <dbl> <dbl> <int>  <dbl>
##  1 NYC    -1    -1.6    96 -0.996
##  2 NYC    -0.8  -1.2    97 -0.820
##  3 NYC    -0.7  -1.1    97 -0.815
##  4 NYC    -0.6  -1      97 -0.625
##  5 NYC     4.8   0.4    73  4.80 
##  6 NYC     1.7  -0.4    86  1.60 
##  7 NYC    -1.8  -6.2    72 -1.80 
##  8 NYC    -2    -9.9    55 -2.20 
##  9 NYC    -3.7 -13.1    48 -3.79 
## 10 NYC    -8.7 -17.4    49 -8.58 
## # ℹ 182,625 more rows
# Errors by city
rhOut_02_mt2 %>%
    group_by(src) %>%
    summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
    mutate(rmse=sqrt(e2), r2=1-e2/e2Base)
## # A tibble: 5 × 7
##   src         e2     mu     n e2Base  rmse    r2
##   <chr>    <dbl>  <dbl> <int>  <dbl> <dbl> <dbl>
## 1 Chicago 0.0133 0.0479 36557  125.  0.116 1.00 
## 2 Houston 0.0139 0.0489 36998   60.4 0.118 1.00 
## 3 LA      0.0353 0.0530 36972   51.9 0.188 0.999
## 4 NYC     0.0134 0.0510 35474  102.  0.116 1.00 
## 5 Vegas   0.126  0.0675 36634  110.  0.355 0.999
# Errors by RH
rhOut_02_mt2 %>%
    mutate(rh5=round(rh/5)*5) %>%
    group_by(rh5) %>%
    summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
    mutate(rmse=sqrt(e2), r2=1-e2/e2Base, e2pct=n*e2/sum(n*e2)) %>%
    print(n=25)
## # A tibble: 21 × 8
##      rh5      e2      mu     n e2Base   rmse    r2   e2pct
##    <dbl>   <dbl>   <dbl> <int>  <dbl>  <dbl> <dbl>   <dbl>
##  1     0 9.37    -2.76       3   74.4 3.06   0.874 0.00379
##  2     5 0.891   -0.0136  2151   46.2 0.944  0.981 0.259  
##  3    10 0.268    0.0792  6179   61.0 0.518  0.996 0.224  
##  4    15 0.121    0.0835  6865   64.8 0.348  0.998 0.112  
##  5    20 0.0688   0.0788  6235   67.6 0.262  0.999 0.0579 
##  6    25 0.0456   0.0697  5763   76.5 0.214  0.999 0.0355 
##  7    30 0.0334   0.0639  5964   92.0 0.183  1.00  0.0268 
##  8    35 0.0256   0.0624  6306  105.  0.160  1.00  0.0218 
##  9    40 0.0216   0.0598  7230  112.  0.147  1.00  0.0210 
## 10    45 0.0187   0.0585  8165  116.  0.137  1.00  0.0205 
## 11    50 0.0166   0.0552  9269  113.  0.129  1.00  0.0207 
## 12    55 0.0150   0.0559  9997  114.  0.123  1.00  0.0203 
## 13    60 0.0139   0.0526 10919  112.  0.118  1.00  0.0205 
## 14    65 0.0128   0.0511 11278  115.  0.113  1.00  0.0195 
## 15    70 0.0126   0.0527 12057  111.  0.112  1.00  0.0206 
## 16    75 0.0123   0.0488 12812  108.  0.111  1.00  0.0212 
## 17    80 0.0110   0.0431 13248  102.  0.105  1.00  0.0197 
## 18    85 0.0122   0.0495 13982   85.8 0.110  1.00  0.0229 
## 19    90 0.0110   0.0408 15304   69.7 0.105  1.00  0.0227 
## 20    95 0.0130   0.0611 14419   52.8 0.114  1.00  0.0252 
## 21   100 0.00787 -0.0108  4489   35.9 0.0887 1.00  0.00476

The model performs significantly better for very high RH, with the only meaningful errors at low RH where the impact of rounding (raw data RH is reported to the nearest percent) has the greatest impact

The process is run to predict RH based on temperature and dewpoint, starting with mtry=1:

predRH_02_mt1 <- rhTrain_02 %>%
    runFullRF(dfTrain=., 
              yVar=c("rh"), 
              xVars=c("t", "d"), 
              isContVar=TRUE, 
              refXY=TRUE, 
              mtry=1, 
              dfTest=allCity %>%
                  filter(tt=="test") %>%
                  select(src, t=temperature_2m, d=dewpoint_2m, rh=relativehumidity_2m), 
              rndTo=1, 
              returnData=TRUE
    )

## 
## R-squared of test data is: 99.917% (RMSE 0.75 vs. 26.1 null)
## `geom_smooth()` using formula = 'y ~ x'

predRH_02_mt1 <- predRH_02_mt1$tstPred
predRH_02_mt1
## # A tibble: 182,635 × 5
##    src       t     d    rh  pred
##    <chr> <dbl> <dbl> <int> <dbl>
##  1 NYC    -1    -1.6    96  93.8
##  2 NYC    -0.8  -1.2    97  95.2
##  3 NYC    -0.7  -1.1    97  95.2
##  4 NYC    -0.6  -1      97  94.7
##  5 NYC     4.8   0.4    73  72.9
##  6 NYC     1.7  -0.4    86  85.9
##  7 NYC    -1.8  -6.2    72  71.8
##  8 NYC    -2    -9.9    55  54.4
##  9 NYC    -3.7 -13.1    48  48.1
## 10 NYC    -8.7 -17.4    49  49.6
## # ℹ 182,625 more rows
# Errors by city
predRH_02_mt1 %>%
    group_by(src) %>%
    summarize(e2=mean((rh-pred)**2), mu=mean(rh-pred), n=n(), e2Base=mean((rh-mean(rh))**2)) %>%
    mutate(rmse=sqrt(e2), r2=1-e2/e2Base)
## # A tibble: 5 × 7
##   src        e2     mu     n e2Base  rmse    r2
##   <chr>   <dbl>  <dbl> <int>  <dbl> <dbl> <dbl>
## 1 Chicago 0.657 0.404  36557   240. 0.811 0.997
## 2 Houston 0.702 0.470  36998   341. 0.838 0.998
## 3 LA      0.652 0.323  36972   665. 0.807 0.999
## 4 NYC     0.672 0.409  35474   321. 0.820 0.998
## 5 Vegas   0.161 0.0693 36634   376. 0.402 1.00
# Errors by RH
predRH_02_mt1 %>%
    mutate(rh5=round(rh/5)*5) %>%
    group_by(rh5) %>%
    summarize(e2=mean((rh-pred)**2), mu=mean(rh-pred), n=n(), e2Base=mean((rh-mean(rh))**2)) %>%
    mutate(rmse=sqrt(e2), r2=1-e2/e2Base, e2pct=n*e2/sum(n*e2)) %>%
    print(n=25)
## # A tibble: 21 × 8
##      rh5     e2       mu     n e2Base  rmse        r2      e2pct
##    <dbl>  <dbl>    <dbl> <int>  <dbl> <dbl>     <dbl>      <dbl>
##  1     0 0.208  -0.429       3  0     0.456 -Inf      0.00000600
##  2     5 0.0972 -0.0812   2151  1.27  0.312    0.924  0.00201   
##  3    10 0.0957 -0.00871  6179  1.94  0.309    0.951  0.00569   
##  4    15 0.107   0.0250   6865  2.00  0.327    0.947  0.00705   
##  5    20 0.109   0.0178   6235  2.01  0.330    0.946  0.00653   
##  6    25 0.115   0.0279   5763  1.99  0.340    0.942  0.00641   
##  7    30 0.135   0.0493   5964  2.05  0.367    0.934  0.00774   
##  8    35 0.150   0.0690   6306  2.00  0.387    0.925  0.00910   
##  9    40 0.158   0.0293   7230  1.98  0.398    0.920  0.0110    
## 10    45 0.160   0.0107   8165  1.99  0.400    0.919  0.0126    
## 11    50 0.179   0.0272   9269  2.00  0.423    0.910  0.0160    
## 12    55 0.202   0.0662   9997  2.01  0.450    0.899  0.0195    
## 13    60 0.235   0.136   10919  2.01  0.485    0.883  0.0247    
## 14    65 0.279   0.180   11278  2.01  0.528    0.861  0.0303    
## 15    70 0.294   0.170   12057  1.99  0.542    0.852  0.0341    
## 16    75 0.317   0.222   12812  2.03  0.563    0.844  0.0391    
## 17    80 0.406   0.330   13248  1.97  0.637    0.794  0.0518    
## 18    85 0.538   0.470   13982  2.00  0.733    0.732  0.0724    
## 19    90 0.834   0.727   15304  1.97  0.913    0.578  0.123     
## 20    95 1.84    1.23    14419  1.93  1.36     0.0451 0.256     
## 21   100 6.14    2.40     4489  0.554 2.48   -10.1    0.265

The model is inaccurate at high relative humidities but otherwise predicts accurately RH consistent with the known formula

The process is updated to predict RH based on temperature and dewpoint with mtry=2:

predRH_02_mt2 <- rhTrain_02 %>%
    runFullRF(dfTrain=., 
              yVar=c("rh"), 
              xVars=c("t", "d"), 
              isContVar=TRUE, 
              refXY=TRUE, 
              mtry=2, 
              dfTest=allCity %>%
                  filter(tt=="test") %>%
                  select(src, t=temperature_2m, d=dewpoint_2m, rh=relativehumidity_2m), 
              rndTo=1, 
              returnData=TRUE
    )

## 
## R-squared of test data is: 99.97% (RMSE 0.45 vs. 26.1 null)
## `geom_smooth()` using formula = 'y ~ x'

predRH_02_mt2 <- predRH_02_mt2$tstPred
predRH_02_mt2
## # A tibble: 182,635 × 5
##    src       t     d    rh  pred
##    <chr> <dbl> <dbl> <int> <dbl>
##  1 NYC    -1    -1.6    96  95.7
##  2 NYC    -0.8  -1.2    97  97.0
##  3 NYC    -0.7  -1.1    97  97.0
##  4 NYC    -0.6  -1      97  97.1
##  5 NYC     4.8   0.4    73  73.3
##  6 NYC     1.7  -0.4    86  86.5
##  7 NYC    -1.8  -6.2    72  71.9
##  8 NYC    -2    -9.9    55  54.4
##  9 NYC    -3.7 -13.1    48  48.1
## 10 NYC    -8.7 -17.4    49  49.8
## # ℹ 182,625 more rows
# Errors by city
predRH_02_mt2 %>%
    group_by(src) %>%
    summarize(e2=mean((rh-pred)**2), mu=mean(rh-pred), n=n(), e2Base=mean((rh-mean(rh))**2)) %>%
    mutate(rmse=sqrt(e2), r2=1-e2/e2Base)
## # A tibble: 5 × 7
##   src        e2     mu     n e2Base  rmse    r2
##   <chr>   <dbl>  <dbl> <int>  <dbl> <dbl> <dbl>
## 1 Chicago 0.255 0.0224 36557   240. 0.505 0.999
## 2 Houston 0.221 0.0247 36998   341. 0.470 0.999
## 3 LA      0.197 0.0364 36972   665. 0.444 1.00 
## 4 NYC     0.229 0.0228 35474   321. 0.478 0.999
## 5 Vegas   0.117 0.0269 36634   376. 0.342 1.00
# Errors by RH
predRH_02_mt2 %>%
    mutate(rh5=round(rh/5)*5) %>%
    group_by(rh5) %>%
    summarize(e2=mean((rh-pred)**2), mu=mean(rh-pred), n=n(), e2Base=mean((rh-mean(rh))**2)) %>%
    mutate(rmse=sqrt(e2), r2=1-e2/e2Base, e2pct=n*e2/sum(n*e2)) %>%
    print(n=25)
## # A tibble: 21 × 8
##      rh5     e2       mu     n e2Base  rmse       r2     e2pct
##    <dbl>  <dbl>    <dbl> <int>  <dbl> <dbl>    <dbl>     <dbl>
##  1     0 0.145  -0.344       3  0     0.381 -Inf     0.0000117
##  2     5 0.0834 -0.0197   2151  1.27  0.289    0.935 0.00482  
##  3    10 0.0863  0.0137   6179  1.94  0.294    0.956 0.0143   
##  4    15 0.0904  0.0286   6865  2.00  0.301    0.955 0.0167   
##  5    20 0.0961  0.0328   6235  2.01  0.310    0.952 0.0161   
##  6    25 0.100   0.0269   5763  1.99  0.317    0.950 0.0155   
##  7    30 0.110   0.0271   5964  2.05  0.331    0.947 0.0176   
##  8    35 0.115   0.0245   6306  2.00  0.340    0.942 0.0196   
##  9    40 0.128   0.0227   7230  1.98  0.358    0.935 0.0250   
## 10    45 0.138   0.0213   8165  1.99  0.372    0.930 0.0304   
## 11    50 0.150   0.0185   9269  2.00  0.388    0.925 0.0374   
## 12    55 0.166   0.0229   9997  2.01  0.407    0.918 0.0445   
## 13    60 0.182   0.0204  10919  2.01  0.427    0.909 0.0534   
## 14    65 0.198   0.0199  11278  2.01  0.445    0.902 0.0600   
## 15    70 0.223   0.00685 12057  1.99  0.472    0.888 0.0721   
## 16    75 0.239   0.00575 12812  2.03  0.489    0.882 0.0823   
## 17    80 0.267   0.0121  13248  1.97  0.516    0.865 0.0950   
## 18    85 0.288   0.00731 13982  2.00  0.537    0.856 0.108    
## 19    90 0.295   0.0104  15304  1.97  0.543    0.850 0.121    
## 20    95 0.301   0.0299  14419  1.93  0.549    0.844 0.117    
## 21   100 0.403   0.393    4489  0.554 0.634    0.273 0.0486

The model is now accurate even at high relative humidities

An approximate formula for vapor pressure deficit is assessed for resonance with the data:

# Approximate formula for vapor pressure deficit
# Source https://pulsegrow.com/blogs/learn/vpd
calcVPD <- function(t, d, c1=610.78, c2=17.2694, c3=237.3) {
    # SVP (saturation vapor pressure) = 610.78 * exp(T * 17.2694 / (T + 237.3))
    # VPD = (1 - RH/100) * SVP
    # Formula produces VPD in Pa, divide by 1000 to convert to kPa
    (1 - calcRH(t, d)/100) * c1 * exp(t * c2 / (t + c3)) / 1000
}

# Applied to sample data
dfTestTemp_v3 %>%
    select(src, t=temperature_2m, d=dewpoint_2m, v=vapor_pressure_deficit) %>%
    mutate(cvpd=calcVPD(t, d)) %>%
    ggplot(aes(x=v, y=cvpd)) + 
    geom_point(aes(color=src)) + 
    facet_wrap(~src) + 
    geom_smooth(method="lm") + 
    geom_abline(intercept=0, slope=1, lty=2) + 
    labs(x="Reported vapor pressure deficit (kPa)", 
         y="Formula vapor pressure deficit (kPa)", 
         title="Vapor pressure deficit by formula from temperature and dewpoint vs. reported in raw data") + 
    scale_color_discrete(NULL)
## `geom_smooth()` using formula = 'y ~ x'

The formula is a strong match to the reported data, which should allow the random forest to find the correct third value when given two of T, D, VPD (provided that training space also includes that combination)

Example training data is created for all temperatures and dew points between -50 and 50 (rounded to the nearest 1), with VPD calculated based on formula:

# Sample dataset
rhTrainVPD <- expand.grid(t=seq(-50, 50, by=1), d=seq(-50, 50, by=1)) %>% 
    tibble::as_tibble() %>% 
    filter(d<=t) %>% 
    mutate(vpd=calcVPD(t, d))
rhTrainVPD
## # A tibble: 5,151 × 3
##        t     d      vpd
##    <dbl> <dbl>    <dbl>
##  1   -50   -50 0       
##  2   -49   -50 0.000738
##  3   -48   -50 0.00156 
##  4   -47   -50 0.00247 
##  5   -46   -50 0.00348 
##  6   -45   -50 0.00461 
##  7   -44   -50 0.00585 
##  8   -43   -50 0.00722 
##  9   -42   -50 0.00874 
## 10   -41   -50 0.0104  
## # ℹ 5,141 more rows
# Training and testing (mtry=1)
rhOutVPD <- rhTrainVPD %>%
    bind_rows(.,.,.,.,.,.,.,.,.,.) %>% 
    runFullRF(dfTrain=., 
              yVar=c("t"), 
              xVars=c("vpd", "d"), 
              isContVar=TRUE, 
              refXY=TRUE, 
              mtry=1, 
              dfTest=allCity %>%
                  filter(tt=="test") %>%
                  select(src, t=temperature_2m, d=dewpoint_2m, vpd=vapor_pressure_deficit), 
              rndTo=1, 
              returnData=TRUE
              )

## 
## R-squared of test data is: 99.667% (RMSE 0.6 vs. 10.39 null)
## `geom_smooth()` using formula = 'y ~ x'

rhOutVPD <- rhOutVPD$tstPred
rhOutVPD
## # A tibble: 182,635 × 5
##    src       t     d   vpd  pred
##    <chr> <dbl> <dbl> <dbl> <dbl>
##  1 NYC    -1    -1.6  0.02 -2.30
##  2 NYC    -0.8  -1.2  0.02 -1.54
##  3 NYC    -0.7  -1.1  0.02 -1.54
##  4 NYC    -0.6  -1    0.02 -1.54
##  5 NYC     4.8   0.4  0.23  4.53
##  6 NYC     1.7  -0.4  0.1   2.81
##  7 NYC    -1.8  -6.2  0.15 -1.17
##  8 NYC    -2    -9.9  0.24 -1.74
##  9 NYC    -3.7 -13.1  0.24 -3.20
## 10 NYC    -8.7 -17.4  0.16 -8.20
## # ℹ 182,625 more rows
# Errors by city
rhOutVPD %>%
    group_by(src) %>%
    summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
    mutate(rmse=sqrt(e2), r2=1-e2/e2Base)
## # A tibble: 5 × 7
##   src         e2      mu     n e2Base  rmse    r2
##   <chr>    <dbl>   <dbl> <int>  <dbl> <dbl> <dbl>
## 1 Chicago 0.291  -0.120  36557  125.  0.539 0.998
## 2 Houston 0.720   0.161  36998   60.4 0.849 0.988
## 3 LA      0.354   0.0313 36972   51.9 0.595 0.993
## 4 NYC     0.362  -0.0388 35474  102.  0.601 0.996
## 5 Vegas   0.0710 -0.0715 36634  110.  0.267 0.999
# Errors by VPD
rhOutVPD %>%
    mutate(vpd_rnd=ifelse(vpd<0.4, round(vpd*20)/20, ifelse(vpd<2, round(vpd*5)/5, round(vpd/1)*1))) %>%
    group_by(vpd_rnd) %>%
    summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
    mutate(rmse=sqrt(e2), r2=1-e2/e2Base, e2pct=n*e2/sum(n*e2)) %>%
    print(n=30)
## # A tibble: 24 × 8
##    vpd_rnd     e2       mu     n  e2Base  rmse    r2     e2pct
##      <dbl>  <dbl>    <dbl> <int>   <dbl> <dbl> <dbl>     <dbl>
##  1    0    2.29    0.746    3081  51.6   1.51  0.956 0.107    
##  2    0.05 2.41    0.791   10402 105.    1.55  0.977 0.381    
##  3    0.1  1.28    0.0988  12935  94.8   1.13  0.986 0.253    
##  4    0.15 0.371  -0.0862  11789  89.5   0.609 0.996 0.0665   
##  5    0.2  0.190  -0.153   10651  83.5   0.436 0.998 0.0308   
##  6    0.25 0.122  -0.162    9233  74.7   0.349 0.998 0.0171   
##  7    0.3  0.114  -0.107    8018  66.5   0.338 0.998 0.0139   
##  8    0.35 0.110  -0.110    6911  59.1   0.331 0.998 0.0115   
##  9    0.4  0.0856 -0.0950  15064  50.3   0.293 0.998 0.0196   
## 10    0.6  0.0778 -0.0188  16435  41.9   0.279 0.998 0.0194   
## 11    0.8  0.0630  0.00694 13819  36.6   0.251 0.998 0.0132   
## 12    1    0.134  -0.251    9892  31.8   0.366 0.996 0.0201   
## 13    1.2  0.0941 -0.217    8747  28.5   0.307 0.997 0.0125   
## 14    1.4  0.0533 -0.119    6520  24.7   0.231 0.998 0.00528  
## 15    1.6  0.0420 -0.0535   5911  21.4   0.205 0.998 0.00378  
## 16    1.8  0.0542  0.0862   4359  18.9   0.233 0.997 0.00359  
## 17    2    0.0723  0.0217  10158  14.5   0.269 0.995 0.0112   
## 18    3    0.0431 -0.0900   9263   9.29  0.207 0.995 0.00606  
## 19    4    0.0370 -0.0382   4687   4.41  0.192 0.992 0.00264  
## 20    5    0.0300  0.0323   2502   2.02  0.173 0.985 0.00114  
## 21    6    0.0239  0.0171   1415   1.23  0.155 0.981 0.000514 
## 22    7    0.0427  0.137     605   0.766 0.207 0.944 0.000393 
## 23    8    0.0517  0.176     205   0.547 0.227 0.906 0.000161 
## 24    9    0.0975  0.266      33   0.288 0.312 0.661 0.0000489

Training data rounds temperature to the nearest degree, making temperature predictions commonly off by a fraction of a degree. The model is generally accurate, with the exception of very low/high vapor pressure deficits.

The model is updated to use mtry=2:

# Training and testing (mtry=2)
rhOutVPD_mt2 <- rhTrainVPD %>%
    bind_rows(.,.,.,.,.,.,.,.,.,.) %>% 
    runFullRF(dfTrain=., 
              yVar=c("t"), 
              xVars=c("vpd", "d"), 
              isContVar=TRUE, 
              refXY=TRUE, 
              mtry=2, 
              dfTest=allCity %>%
                  filter(tt=="test") %>%
                  select(src, t=temperature_2m, d=dewpoint_2m, vpd=vapor_pressure_deficit), 
              rndTo=2, 
              returnData=TRUE
              )

## 
## R-squared of test data is: 99.931% (RMSE 0.27 vs. 10.39 null)
## `geom_smooth()` using formula = 'y ~ x'

rhOutVPD_mt2 <- rhOutVPD_mt2$tstPred
rhOutVPD_mt2
## # A tibble: 182,635 × 5
##    src       t     d   vpd  pred
##    <chr> <dbl> <dbl> <dbl> <dbl>
##  1 NYC    -1    -1.6  0.02 -2.06
##  2 NYC    -0.8  -1.2  0.02 -1   
##  3 NYC    -0.7  -1.1  0.02 -1   
##  4 NYC    -0.6  -1    0.02 -1   
##  5 NYC     4.8   0.4  0.23  4.54
##  6 NYC     1.7  -0.4  0.1   2.44
##  7 NYC    -1.8  -6.2  0.15 -1.54
##  8 NYC    -2    -9.9  0.24 -2.19
##  9 NYC    -3.7 -13.1  0.24 -3.82
## 10 NYC    -8.7 -17.4  0.16 -8.58
## # ℹ 182,625 more rows
# Errors by city
rhOutVPD_mt2 %>%
    group_by(src) %>%
    summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
    mutate(rmse=sqrt(e2), r2=1-e2/e2Base)
## # A tibble: 5 × 7
##   src         e2      mu     n e2Base  rmse    r2
##   <chr>    <dbl>   <dbl> <int>  <dbl> <dbl> <dbl>
## 1 Chicago 0.0807 0.0556  36557  125.  0.284 0.999
## 2 Houston 0.0930 0.0725  36998   60.4 0.305 0.998
## 3 LA      0.0664 0.0503  36972   51.9 0.258 0.999
## 4 NYC     0.0804 0.0616  35474  102.  0.284 0.999
## 5 Vegas   0.0527 0.00782 36634  110.  0.230 1.00
# Errors by VPD
rhOutVPD_mt2 %>%
    mutate(vpd_rnd=ifelse(vpd<0.4, round(vpd*20)/20, ifelse(vpd<2, round(vpd*5)/5, round(vpd/1)*1))) %>%
    group_by(vpd_rnd) %>%
    summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
    mutate(rmse=sqrt(e2), r2=1-e2/e2Base, e2pct=n*e2/sum(n*e2)) %>%
    print(n=30)
## # A tibble: 24 × 8
##    vpd_rnd     e2         mu     n  e2Base  rmse    r2    e2pct
##      <dbl>  <dbl>      <dbl> <int>   <dbl> <dbl> <dbl>    <dbl>
##  1    0    0.135   0.189      3081  51.6   0.368 0.997 0.0306  
##  2    0.05 0.176   0.211     10402 105.    0.420 0.998 0.134   
##  3    0.1  0.127   0.0873    12935  94.8   0.356 0.999 0.120   
##  4    0.15 0.114   0.0702    11789  89.5   0.337 0.999 0.0985  
##  5    0.2  0.0873  0.0103    10651  83.5   0.295 0.999 0.0682  
##  6    0.25 0.0769  0.0377     9233  74.7   0.277 0.999 0.0521  
##  7    0.3  0.0769  0.0509     8018  66.5   0.277 0.999 0.0452  
##  8    0.35 0.0741  0.0363     6911  59.1   0.272 0.999 0.0376  
##  9    0.4  0.0625  0.0319    15064  50.3   0.250 0.999 0.0690  
## 10    0.6  0.0525  0.0446    16435  41.9   0.229 0.999 0.0634  
## 11    0.8  0.0474  0.0339    13819  36.6   0.218 0.999 0.0480  
## 12    1    0.0445  0.0357     9892  31.8   0.211 0.999 0.0323  
## 13    1.2  0.0379  0.0244     8747  28.5   0.195 0.999 0.0243  
## 14    1.4  0.0341  0.0283     6520  24.7   0.185 0.999 0.0163  
## 15    1.6  0.0351  0.0364     5911  21.4   0.187 0.998 0.0152  
## 16    1.8  0.0472  0.0317     4359  18.9   0.217 0.998 0.0151  
## 17    2    0.0426  0.0236    10158  14.5   0.206 0.997 0.0318  
## 18    3    0.0588  0.0210     9263   9.29  0.242 0.994 0.0399  
## 19    4    0.0723  0.00279    4687   4.41  0.269 0.984 0.0249  
## 20    5    0.0849 -0.0209     2502   2.02  0.291 0.958 0.0156  
## 21    6    0.0986  0.00277    1415   1.23  0.314 0.920 0.0102  
## 22    7    0.112   0.0000231   605   0.766 0.335 0.853 0.00499 
## 23    8    0.104   0.0297      205   0.547 0.323 0.810 0.00157 
## 24    9    0.233   0.170        33   0.288 0.483 0.190 0.000564

The model is more accurate, particularly for very low vapor pressure deficits

Training data is updated to include 0.2 degree granularity for temperature and dewpoint:

# Sample dataset
rhTrainVPD_02 <- expand.grid(t=seq(-50, 50, by=0.2), d=seq(-50, 50, by=0.2)) %>% 
    tibble::as_tibble() %>% 
    filter(d<=t) %>% 
    mutate(vpd=calcVPD(t, d))
rhTrainVPD_02
## # A tibble: 125,751 × 3
##        t     d      vpd
##    <dbl> <dbl>    <dbl>
##  1 -50     -50 0       
##  2 -49.8   -50 0.000141
##  3 -49.6   -50 0.000286
##  4 -49.4   -50 0.000433
##  5 -49.2   -50 0.000584
##  6 -49     -50 0.000738
##  7 -48.8   -50 0.000895
##  8 -48.6   -50 0.00106 
##  9 -48.4   -50 0.00122 
## 10 -48.2   -50 0.00139 
## # ℹ 125,741 more rows
# Training and testing (mtry=2)
rhOutVPD_02_mt2 <- rhTrainVPD_02 %>%
    bind_rows(.,.,.,.,.,.,.,.,.,.) %>% 
    runFullRF(dfTrain=., 
              yVar=c("t"), 
              xVars=c("vpd", "d"), 
              isContVar=TRUE, 
              refXY=TRUE, 
              mtry=2, 
              dfTest=allCity %>%
                  filter(tt=="test") %>%
                  select(src, t=temperature_2m, d=dewpoint_2m, vpd=vapor_pressure_deficit), 
              rndTo=2, 
              returnData=TRUE
              )
## Growing trees.. Progress: 8%. Estimated remaining time: 5 minutes, 58 seconds.
## Growing trees.. Progress: 18%. Estimated remaining time: 5 minutes, 14 seconds.
## Growing trees.. Progress: 25%. Estimated remaining time: 5 minutes, 3 seconds.
## Growing trees.. Progress: 32%. Estimated remaining time: 4 minutes, 31 seconds.
## Growing trees.. Progress: 41%. Estimated remaining time: 3 minutes, 56 seconds.
## Growing trees.. Progress: 49%. Estimated remaining time: 3 minutes, 18 seconds.
## Growing trees.. Progress: 57%. Estimated remaining time: 2 minutes, 47 seconds.
## Growing trees.. Progress: 66%. Estimated remaining time: 2 minutes, 12 seconds.
## Growing trees.. Progress: 74%. Estimated remaining time: 1 minute, 40 seconds.
## Growing trees.. Progress: 82%. Estimated remaining time: 1 minute, 11 seconds.
## Growing trees.. Progress: 91%. Estimated remaining time: 36 seconds.
## Growing trees.. Progress: 99%. Estimated remaining time: 4 seconds.

## 
## R-squared of test data is: 99.995% (RMSE 0.07 vs. 10.39 null)
## `geom_smooth()` using formula = 'y ~ x'

rhOutVPD_02_mt2 <- rhOutVPD_02_mt2$tstPred
rhOutVPD_02_mt2
## # A tibble: 182,635 × 5
##    src       t     d   vpd   pred
##    <chr> <dbl> <dbl> <dbl>  <dbl>
##  1 NYC    -1    -1.6  0.02 -1.11 
##  2 NYC    -0.8  -1.2  0.02 -0.700
##  3 NYC    -0.7  -1.1  0.02 -0.699
##  4 NYC    -0.6  -1    0.02 -0.490
##  5 NYC     4.8   0.4  0.23  4.80 
##  6 NYC     1.7  -0.4  0.1   1.70 
##  7 NYC    -1.8  -6.2  0.15 -1.80 
##  8 NYC    -2    -9.9  0.24 -2.00 
##  9 NYC    -3.7 -13.1  0.24 -3.79 
## 10 NYC    -8.7 -17.4  0.16 -8.72 
## # ℹ 182,625 more rows
# Errors by city
rhOutVPD_02_mt2 %>%
    group_by(src) %>%
    summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
    mutate(rmse=sqrt(e2), r2=1-e2/e2Base)
## # A tibble: 5 × 7
##   src          e2       mu     n e2Base   rmse    r2
##   <chr>     <dbl>    <dbl> <int>  <dbl>  <dbl> <dbl>
## 1 Chicago 0.00844  0.0298  36557  125.  0.0919  1.00
## 2 Houston 0.00585  0.0364  36998   60.4 0.0765  1.00
## 3 LA      0.00441  0.0264  36972   51.9 0.0664  1.00
## 4 NYC     0.00674  0.0329  35474  102.  0.0821  1.00
## 5 Vegas   0.00242 -0.00111 36634  110.  0.0492  1.00
# Errors by VPD
rhOutVPD_02_mt2 %>%
    mutate(vpd_rnd=ifelse(vpd<0.4, round(vpd*20)/20, ifelse(vpd<2, round(vpd*5)/5, round(vpd/1)*1))) %>%
    group_by(vpd_rnd) %>%
    summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
    mutate(rmse=sqrt(e2), r2=1-e2/e2Base, e2pct=n*e2/sum(n*e2)) %>%
    print(n=30)
## # A tibble: 24 × 8
##    vpd_rnd      e2        mu     n  e2Base   rmse    r2     e2pct
##      <dbl>   <dbl>     <dbl> <int>   <dbl>  <dbl> <dbl>     <dbl>
##  1    0    0.0157   0.0784    3081  51.6   0.125  1.00  0.0477   
##  2    0.05 0.0136   0.0436   10402 105.    0.117  1.00  0.139    
##  3    0.1  0.0103   0.0386   12935  94.8   0.101  1.00  0.131    
##  4    0.15 0.00893  0.0353   11789  89.5   0.0945 1.00  0.104    
##  5    0.2  0.00783  0.0329   10651  83.5   0.0885 1.00  0.0821   
##  6    0.25 0.00689  0.0305    9233  74.7   0.0830 1.00  0.0626   
##  7    0.3  0.00621  0.0292    8018  66.5   0.0788 1.00  0.0490   
##  8    0.35 0.00570  0.0291    6911  59.1   0.0755 1.00  0.0388   
##  9    0.4  0.00508  0.0273   15064  50.3   0.0713 1.00  0.0754   
## 10    0.6  0.00436  0.0247   16435  41.9   0.0661 1.00  0.0706   
## 11    0.8  0.00364  0.0216   13819  36.6   0.0603 1.00  0.0495   
## 12    1    0.00322  0.0186    9892  31.8   0.0568 1.00  0.0314   
## 13    1.2  0.00294  0.0173    8747  28.5   0.0542 1.00  0.0253   
## 14    1.4  0.00264  0.0169    6520  24.7   0.0514 1.00  0.0169   
## 15    1.6  0.00259  0.0159    5911  21.4   0.0509 1.00  0.0151   
## 16    1.8  0.00243  0.0149    4359  18.9   0.0493 1.00  0.0104   
## 17    2    0.00210  0.0119   10158  14.5   0.0458 1.00  0.0210   
## 18    3    0.00164  0.00576   9263   9.29  0.0405 1.00  0.0150   
## 19    4    0.00143 -0.000393  4687   4.41  0.0378 1.00  0.00658  
## 20    5    0.00163 -0.00335   2502   2.02  0.0404 0.999 0.00402  
## 21    6    0.00212 -0.0104    1415   1.23  0.0460 0.998 0.00295  
## 22    7    0.00245 -0.0137     605   0.766 0.0495 0.997 0.00146  
## 23    8    0.00236 -0.0116     205   0.547 0.0486 0.996 0.000477 
## 24    9    0.00263 -0.0169      33   0.288 0.0513 0.991 0.0000855

Predictions become extremely accurate, at the expense of long run times

The model is run to predict VPD as f(T, D):

# Training and testing (mtry=2)
predVPD_02_mt2 <- rhTrainVPD_02 %>%
    # bind_rows(.,.,.,.,.,.,.,.,.,.) %>%
    runFullRF(dfTrain=., 
              yVar=c("vpd"), 
              xVars=c("t", "d"), 
              isContVar=TRUE, 
              refXY=TRUE, 
              mtry=2, 
              dfTest=allCity %>%
                  filter(tt=="test") %>%
                  select(src, t=temperature_2m, d=dewpoint_2m, vpd=vapor_pressure_deficit), 
              rndTo=0.025, 
              returnData=TRUE
              )

## 
## R-squared of test data is: 99.991% (RMSE 0.01 vs. 1.2 null)
## `geom_smooth()` using formula = 'y ~ x'

predVPD_02_mt2 <- predVPD_02_mt2$tstPred
predVPD_02_mt2
## # A tibble: 182,635 × 5
##    src       t     d   vpd   pred
##    <chr> <dbl> <dbl> <dbl>  <dbl>
##  1 NYC    -1    -1.6  0.02 0.0247
##  2 NYC    -0.8  -1.2  0.02 0.0167
##  3 NYC    -0.7  -1.1  0.02 0.0167
##  4 NYC    -0.6  -1    0.02 0.0178
##  5 NYC     4.8   0.4  0.23 0.232 
##  6 NYC     1.7  -0.4  0.1  0.0931
##  7 NYC    -1.8  -6.2  0.15 0.151 
##  8 NYC    -2    -9.9  0.24 0.241 
##  9 NYC    -3.7 -13.1  0.24 0.239 
## 10 NYC    -8.7 -17.4  0.16 0.158 
## # ℹ 182,625 more rows
# Errors by city
predVPD_02_mt2 %>%
    group_by(src) %>%
    summarize(e2=mean((vpd-pred)**2), mu=mean(vpd-pred), n=n(), e2Base=mean((vpd-mean(vpd))**2)) %>%
    mutate(rmse=sqrt(e2), r2=1-e2/e2Base)
## # A tibble: 5 × 7
##   src            e2      mu     n e2Base    rmse    r2
##   <chr>       <dbl>   <dbl> <int>  <dbl>   <dbl> <dbl>
## 1 Chicago 0.0000808 0.00126 36557  0.248 0.00899  1.00
## 2 Houston 0.000159  0.00166 36998  0.590 0.0126   1.00
## 3 LA      0.000113  0.00305 36972  1.07  0.0106   1.00
## 4 NYC     0.0000790 0.00119 35474  0.298 0.00889  1.00
## 5 Vegas   0.000236  0.00865 36634  2.93  0.0154   1.00
# Errors by VPD
predVPD_02_mt2 %>%
    mutate(vpd_rnd=ifelse(vpd<0.4, round(vpd*20)/20, ifelse(vpd<2, round(vpd*5)/5, round(vpd/1)*1))) %>%
    group_by(vpd_rnd) %>%
    summarize(e2=mean((vpd-pred)**2), mu=mean(vpd-pred), n=n(), e2Base=mean((vpd-mean(vpd))**2)) %>%
    mutate(rmse=sqrt(e2), r2=1-e2/e2Base, e2pct=n*e2/sum(n*e2)) %>%
    print(n=30)
## # A tibble: 24 × 8
##    vpd_rnd        e2        mu     n    e2Base    rmse     r2   e2pct
##      <dbl>     <dbl>     <dbl> <int>     <dbl>   <dbl>  <dbl>   <dbl>
##  1    0    0.0000998 -0.00744   3081 0.0000547 0.00999 -0.825 0.0126 
##  2    0.05 0.0000464 -0.000706 10402 0.000188  0.00681  0.753 0.0197 
##  3    0.1  0.0000565  0.000282 12935 0.000201  0.00751  0.719 0.0298 
##  4    0.15 0.0000639  0.000490 11789 0.000199  0.00800  0.678 0.0308 
##  5    0.2  0.0000671  0.000651 10651 0.000201  0.00819  0.666 0.0292 
##  6    0.25 0.0000692  0.000996  9233 0.000201  0.00832  0.655 0.0261 
##  7    0.3  0.0000748  0.00125   8018 0.000201  0.00865  0.627 0.0245 
##  8    0.35 0.0000798  0.00126   6911 0.000201  0.00893  0.603 0.0225 
##  9    0.4  0.0000838  0.00166  15064 0.00138   0.00915  0.939 0.0515 
## 10    0.6  0.0000914  0.00208  16435 0.00299   0.00956  0.969 0.0614 
## 11    0.8  0.000105   0.00276  13819 0.00364   0.0102   0.971 0.0592 
## 12    1    0.000115   0.00342   9892 0.00302   0.0107   0.962 0.0465 
## 13    1.2  0.000130   0.00398   8747 0.00371   0.0114   0.965 0.0464 
## 14    1.4  0.000142   0.00442   6520 0.00298   0.0119   0.952 0.0379 
## 15    1.6  0.000160   0.00482   5911 0.00368   0.0126   0.957 0.0385 
## 16    1.8  0.000182   0.00616   4359 0.00302   0.0135   0.940 0.0324 
## 17    2    0.000214   0.00708  10158 0.0309    0.0146   0.993 0.0887 
## 18    3    0.000293   0.00969   9263 0.0802    0.0171   0.996 0.111  
## 19    4    0.000431   0.0133    4687 0.0813    0.0208   0.995 0.0824 
## 20    5    0.000604   0.0166    2502 0.0808    0.0246   0.993 0.0617 
## 21    6    0.000823   0.0204    1415 0.0777    0.0287   0.989 0.0475 
## 22    7    0.00108    0.0239     605 0.0774    0.0329   0.986 0.0267 
## 23    8    0.00135    0.0274     205 0.0672    0.0368   0.980 0.0113 
## 24    9    0.00152    0.0282      33 0.0486    0.0390   0.969 0.00205

Predictions are very accurate, as expected

A model is run to predict cloud cover, at first allowing the cloud subset data (low, mid, high):

keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
rfCloudFull <- runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022), 
                      yVar="cloudcover", 
                      xVars=c(varsTrain[!str_detect(varsTrain, "cloudcover$")]), 
                      dfTest=allCity %>% filter(tt=="test", year==2022), 
                      useLabel=keyLabel, 
                      useSub=stringr::str_to_sentence(keyLabel), 
                      isContVar=TRUE,
                      rndTo=-1L,
                      refXY=TRUE,
                      returnData=TRUE
                      )
## Growing trees.. Progress: 17%. Estimated remaining time: 2 minutes, 33 seconds.
## Growing trees.. Progress: 34%. Estimated remaining time: 1 minute, 59 seconds.
## Growing trees.. Progress: 51%. Estimated remaining time: 1 minute, 29 seconds.
## Growing trees.. Progress: 68%. Estimated remaining time: 58 seconds.
## Growing trees.. Progress: 83%. Estimated remaining time: 31 seconds.
## Growing trees.. Progress: 100%. Estimated remaining time: 0 seconds.

## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.481% (RMSE 2.61 vs. 36.27 null)
## `geom_smooth()` using formula = 'y ~ x'

The model is effective at predicting cloud cover. Of interest, variable importance is highest for ‘weathercode’, a categorical variable improperly run as an integer. The interpretation of WMO weather codes is available at https://www.nodc.noaa.gov/archive/arc0021/0002199/1.1/data/0-data/HTML/WMO-CODE/WMO4677.HTM

The weather code and cloud cover variables are explored:

# Distribution of 'cloudcover'
allCity %>% 
    select(weathercode, cloudcover) %>% 
    mutate(weathercode=factor(weathercode)) %>% 
    ggplot(aes(x=cloudcover)) + 
    geom_histogram(bins=50, aes(y=after_stat(count)/sum(after_stat(count)))) + 
    labs(title="Distribution of cloud cover", y="Proportion of total observations")

# Distribution of 'weathercode'
allCity %>% 
    select(weathercode, cloudcover) %>% 
    mutate(weathercode=factor(weathercode)) %>% 
    ggplot(aes(x=weathercode)) + 
    geom_bar(aes(y=after_stat(count)/sum(after_stat(count)))) + 
    labs(title="Distribution of weather code", y="Proportion of total observations")

# Cloud cover boxplot by weather code
allCity %>% 
    select(weathercode, cloudcover) %>% 
    mutate(weathercode=factor(weathercode)) %>% 
    ggplot(aes(x=weathercode, y=cloudcover)) + 
    geom_boxplot(fill="lightblue") + 
    labs(title="Cloud cover by weather code", y="Cloud cover")

Weather code is strongly predictive of cloud cover, with codes 00 and 01 associated with few clouds and other clouds associated with many clouds

The model is run to predict cloud cover using only weather code as a factor:

keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
runFullRF(dfTrain=allCity %>% mutate(fct_wmo=factor(weathercode)) %>% filter(tt=="train", year<2022), 
          yVar="cloudcover", 
          xVars=c("fct_wmo"), 
          dfTest=allCity %>% mutate(fct_wmo=factor(weathercode)) %>% filter(tt=="test", year==2022), 
          useLabel=keyLabel, 
          useSub=stringr::str_to_sentence(keyLabel), 
          isContVar=TRUE,
          rndTo=-1L,
          refXY=TRUE,
          returnData=FALSE
          )

## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.458% (RMSE 9.96 vs. 36.27 null)
## `geom_smooth()` using formula = 'y ~ x'

The model drives over 90% R-squared, with RMSE falling from ~35 in the baseline to ~10 with predictions based solely on weather code

The model is run to predict cloud cover using only the three cloud cover (low, mid, high) predictors:

keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
runFullRF(dfTrain=allCity %>% mutate(fct_wmo=factor(weathercode)) %>% filter(tt=="train", year<2022), 
          yVar="cloudcover", 
          xVars=c(varsTrain[str_detect(varsTrain, pattern="cloudcover_")]), 
          dfTest=allCity %>% mutate(fct_wmo=factor(weathercode)) %>% filter(tt=="test", year==2022), 
          useLabel=keyLabel, 
          useSub=stringr::str_to_sentence(keyLabel), 
          isContVar=TRUE,
          rndTo=-1L,
          refXY=TRUE,
          returnData=FALSE
          )
## Growing trees.. Progress: 93%. Estimated remaining time: 2 seconds.

## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.129% (RMSE 3.39 vs. 36.27 null)
## `geom_smooth()` using formula = 'y ~ x'

The model drives over 99% R-squared, with RMSE falling from ~35 in the baseline to ~3 with predictions based solely on cloud cover sub-types

All combinations of two variables are explored for predicting cloud cover on a smaller training dataset:

# Train and test data
dfTrainCloud <- allCity %>% 
    filter(tt=="train", year<2022) %>% 
    mutate(fct_src=factor(src))
dfTestCloud <- allCity %>% 
    filter(tt=="test", year==2022) %>% 
    mutate(fct_src=factor(src))

# Variables to explore
possCloudVars <- c(varsTrain[!str_detect(varsTrain, "cover$")], "month", "tod")

# Subsets to use
set.seed(24080616)
idxSmallCloud <- sample(1:nrow(dfTrainCloud), 5000, replace=FALSE)
mtxSmallCloud <- matrix(nrow=0, ncol=3)

for(idx1 in 1:(length(possCloudVars)-1)) {
    for(idx2 in (idx1+1):length(possCloudVars)) {
        r2SmallCloud <- runFullRF(dfTrain=dfTrainCloud[idxSmallCloud,], 
                                  yVar="cloudcover", 
                                  xVars=possCloudVars[c(idx1, idx2)], 
                                  dfTest=dfTestCloud, 
                                  useLabel=keyLabel, 
                                  useSub=stringr::str_to_sentence(keyLabel), 
                                  isContVar=TRUE,
                                  makePlots=FALSE,
                                  returnData=TRUE
                                  )[["rfAcc"]][["r2"]]
        mtxSmallCloud <- rbind(mtxSmallCloud, c(idx1, idx2, r2SmallCloud))
    }
}
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.345% (RMSE 36.34 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.025% (RMSE 30.99 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.405% (RMSE 36.53 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.724% (RMSE 36.76 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.72% (RMSE 36.94 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.649% (RMSE 35.23 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.495% (RMSE 31.93 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.401% (RMSE 32.57 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.944% (RMSE 35.74 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.722% (RMSE 21.55 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.433% (RMSE 26.3 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.782% (RMSE 32.29 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.022% (RMSE 33.44 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.673% (RMSE 32.1 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.009% (RMSE 30.78 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.063% (RMSE 34.59 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.179% (RMSE 37.02 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.091% (RMSE 36.65 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.492% (RMSE 37.08 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.869% (RMSE 37.32 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.03% (RMSE 36.82 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.382% (RMSE 33.56 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.079% (RMSE 10.21 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.003% (RMSE 32.04 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.298% (RMSE 36.33 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.628% (RMSE 36.16 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.948% (RMSE 36.1 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.571% (RMSE 35.99 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.649% (RMSE 35.23 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.323% (RMSE 35.85 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.287% (RMSE 36.04 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.867% (RMSE 35.57 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.672% (RMSE 36.4 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.325% (RMSE 37.23 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.705% (RMSE 36.15 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.183% (RMSE 36.31 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.577% (RMSE 32.93 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.12% (RMSE 33.22 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.115% (RMSE 36.48 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.732% (RMSE 36.77 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.539% (RMSE 35.44 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.049% (RMSE 31.19 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.706% (RMSE 31.68 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.775% (RMSE 35.21 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.152% (RMSE 21.41 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.784% (RMSE 26.21 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.941% (RMSE 31.64 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.59% (RMSE 35.62 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.587% (RMSE 35.06 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.386% (RMSE 33.37 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.564% (RMSE 34.69 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.489% (RMSE 37.26 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.003% (RMSE 37.35 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.142% (RMSE 37.55 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.268% (RMSE 37.74 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.09% (RMSE 37.01 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.762% (RMSE 36.14 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.533% (RMSE 9.22 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.541% (RMSE 32.94 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.541% (RMSE 37.62 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.843% (RMSE 36.43 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.273% (RMSE 36.5 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.834% (RMSE 36.61 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.293% (RMSE 35.49 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.787% (RMSE 36.42 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.527% (RMSE 36.73 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.007% (RMSE 36.09 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.155% (RMSE 36.06 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.982% (RMSE 37.52 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.157% (RMSE 35.51 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.055% (RMSE 35.53 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.849% (RMSE 32.88 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.019% (RMSE 32.84 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.093% (RMSE 32.63 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.267% (RMSE 32.39 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.348% (RMSE 29.39 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.912% (RMSE 29.93 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.237% (RMSE 31.78 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.126% (RMSE 21.42 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.464% (RMSE 23.38 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.647% (RMSE 28.41 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.742% (RMSE 31.47 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.515% (RMSE 31.72 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.541% (RMSE 30.66 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.347% (RMSE 30.06 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.086% (RMSE 31.4 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.758% (RMSE 32.09 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.576% (RMSE 33.33 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.681% (RMSE 33.31 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.356% (RMSE 30.92 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.242% (RMSE 31.36 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.148% (RMSE 9.5 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.576% (RMSE 32.73 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.743% (RMSE 32.9 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.306% (RMSE 32.38 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.493% (RMSE 32.34 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.841% (RMSE 32.48 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.275% (RMSE 32.79 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.518% (RMSE 32.74 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.731% (RMSE 32.9 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.197% (RMSE 32.81 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.85% (RMSE 32.07 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.915% (RMSE 32.46 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.717% (RMSE 31.26 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.374% (RMSE 31.55 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.137% (RMSE 32.01 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.88% (RMSE 37.33 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.174% (RMSE 36.06 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.033% (RMSE 31.41 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.253% (RMSE 32.19 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.317% (RMSE 34.92 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.043% (RMSE 21.45 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.864% (RMSE 26.19 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.498% (RMSE 32.14 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.971% (RMSE 35.55 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.283% (RMSE 34.93 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.339% (RMSE 33.38 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.123% (RMSE 35.33 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.242% (RMSE 37.21 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.438% (RMSE 36.89 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.788% (RMSE 38.01 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.494% (RMSE 37.96 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.133% (RMSE 37.19 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.069% (RMSE 35.34 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.105% (RMSE 9.52 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.421% (RMSE 32.96 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.061% (RMSE 33.63 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.767% (RMSE 32.89 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.838% (RMSE 33.87 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.325% (RMSE 35.48 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.654% (RMSE 35.23 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.303% (RMSE 36.04 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.051% (RMSE 36.64 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.524% (RMSE 36.73 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.981% (RMSE 36.1 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.638% (RMSE 35.79 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.424% (RMSE 34.14 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.443% (RMSE 35.64 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.391% (RMSE 37.42 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.779% (RMSE 35.58 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.118% (RMSE 31.39 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.88% (RMSE 31.86 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.895% (RMSE 35.56 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.925% (RMSE 21.48 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.383% (RMSE 26.31 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.334% (RMSE 31.97 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.086% (RMSE 36.08 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.262% (RMSE 35.49 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.784% (RMSE 33.68 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.038% (RMSE 35.16 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.721% (RMSE 37.82 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.346% (RMSE 37.76 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.729% (RMSE 37.82 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.502% (RMSE 38.13 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.063% (RMSE 37.71 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.864% (RMSE 36.43 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.571% (RMSE 9.2 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.525% (RMSE 33.14 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.154% (RMSE 37.55 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.293% (RMSE 36.87 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.274% (RMSE 36.86 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.022% (RMSE 37.17 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.704% (RMSE 35.41 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.91% (RMSE 36.44 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.228% (RMSE 37.03 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.442% (RMSE 36.19 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.36% (RMSE 36.34 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.43% (RMSE 37.77 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.451% (RMSE 36.01 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.937% (RMSE 35.92 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.668% (RMSE 35.97 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.089% (RMSE 32.02 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.995% (RMSE 32.65 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.168% (RMSE 35.69 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.69% (RMSE 21.55 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.385% (RMSE 26.56 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.373% (RMSE 32.57 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.424% (RMSE 36.2 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.296% (RMSE 35.67 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.94% (RMSE 34.04 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.467% (RMSE 35.82 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.152% (RMSE 38.24 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.713% (RMSE 38.17 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.065% (RMSE 38.74 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.547% (RMSE 38.82 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.053% (RMSE 38.05 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.948% (RMSE 36.1 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.214% (RMSE 10.12 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.965% (RMSE 33.25 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.381% (RMSE 37.06 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.005% (RMSE 36.46 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.805% (RMSE 36.6 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.579% (RMSE 36.74 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.617% (RMSE 35.24 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.664% (RMSE 36.39 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.029% (RMSE 36.82 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.446% (RMSE 36.35 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.712% (RMSE 36.4 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.39% (RMSE 37.42 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.515% (RMSE 35.45 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.163% (RMSE 36.24 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.917% (RMSE 30.8 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.416% (RMSE 31.33 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.124% (RMSE 34.2 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.107% (RMSE 21.12 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.112% (RMSE 25.88 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.623% (RMSE 31.28 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.417% (RMSE 34.52 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.068% (RMSE 34.01 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.017% (RMSE 32.44 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.921% (RMSE 34.43 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.893% (RMSE 36.44 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.6% (RMSE 36.56 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.053% (RMSE 36.46 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.4% (RMSE 36.53 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.362% (RMSE 36.21 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.914% (RMSE 34.81 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.537% (RMSE 9.91 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.663% (RMSE 33.11 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.924% (RMSE 35.56 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.303% (RMSE 35.48 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.264% (RMSE 35.49 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.461% (RMSE 35.64 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.661% (RMSE 34.86 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.264% (RMSE 35.86 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.776% (RMSE 36.95 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.148% (RMSE 36.25 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.658% (RMSE 34.67 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.734% (RMSE 35.77 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.789% (RMSE 34.07 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.124% (RMSE 34.58 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.46% (RMSE 31.94 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.459% (RMSE 32.15 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.619% (RMSE 21.27 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.35% (RMSE 25.82 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.065% (RMSE 28.55 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.433% (RMSE 31.32 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.386% (RMSE 30.91 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.549% (RMSE 30.01 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.782% (RMSE 30.61 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.507% (RMSE 31.73 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.332% (RMSE 31.76 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.759% (RMSE 31.88 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.63% (RMSE 31.91 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.737% (RMSE 31.88 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.723% (RMSE 31.26 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.134% (RMSE 14.45 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.033% (RMSE 29.46 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.895% (RMSE 31.23 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.897% (RMSE 31.01 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.309% (RMSE 30.93 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.299% (RMSE 30.93 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.568% (RMSE 30.87 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.024% (RMSE 30.99 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.336% (RMSE 31.34 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.682% (RMSE 30.85 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.826% (RMSE 31.87 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.307% (RMSE 31.56 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.61% (RMSE 31.5 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.186% (RMSE 32.2 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.004% (RMSE 32.24 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.055% (RMSE 21.44 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.689% (RMSE 25.98 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.182% (RMSE 28.98 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.527% (RMSE 31.93 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.393% (RMSE 31.54 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.911% (RMSE 30.58 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.381% (RMSE 31.33 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.763% (RMSE 32.29 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.521% (RMSE 32.34 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.638% (RMSE 32.52 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.421% (RMSE 32.56 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.854% (RMSE 32.47 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.73% (RMSE 31.89 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 83.655% (RMSE 14.67 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.076% (RMSE 29.9 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.339% (RMSE 31.76 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.968% (RMSE 31.42 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.552% (RMSE 31.3 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.185% (RMSE 31.38 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.044% (RMSE 31.41 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.558% (RMSE 31.51 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.583% (RMSE 31.92 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.288% (RMSE 31.35 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.681% (RMSE 32.51 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.321% (RMSE 32.18 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.595% (RMSE 32.12 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.418% (RMSE 32.76 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.065% (RMSE 22.63 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.299% (RMSE 26.83 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.869% (RMSE 31.23 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.284% (RMSE 34.93 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.13% (RMSE 34.58 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.347% (RMSE 33.37 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.206% (RMSE 34.56 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.941% (RMSE 35.37 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.258% (RMSE 35.31 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.234% (RMSE 35.68 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.683% (RMSE 35.78 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.479% (RMSE 35.45 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.972% (RMSE 34.99 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 81.41% (RMSE 15.64 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.122% (RMSE 32.42 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.791% (RMSE 35.21 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.479% (RMSE 35.08 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.191% (RMSE 34.95 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.026% (RMSE 34.79 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.542% (RMSE 33.73 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.684% (RMSE 34.28 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.454% (RMSE 34.71 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.867% (RMSE 34.05 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.864% (RMSE 35.75 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.12% (RMSE 35.52 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.164% (RMSE 35.51 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.547% (RMSE 35.81 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 90.622% (RMSE 11.11 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 83.417% (RMSE 14.77 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.438% (RMSE 21.33 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.548% (RMSE 21.29 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.343% (RMSE 20.73 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.97% (RMSE 20.85 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.19% (RMSE 21.71 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.564% (RMSE 21.59 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.074% (RMSE 21.74 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.121% (RMSE 21.73 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.69% (RMSE 21.55 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.155% (RMSE 21.41 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.832% (RMSE 8.25 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.076% (RMSE 21.44 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.095% (RMSE 21.43 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.205% (RMSE 21.4 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.067% (RMSE 21.13 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.892% (RMSE 21.18 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.219% (RMSE 21.08 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.911% (RMSE 21.49 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.505% (RMSE 21.61 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.299% (RMSE 21.06 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.549% (RMSE 21.6 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.251% (RMSE 21.69 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.155% (RMSE 21.41 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.366% (RMSE 23.12 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.104% (RMSE 24.31 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.303% (RMSE 26.08 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.125% (RMSE 25.87 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.838% (RMSE 25.17 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.59% (RMSE 25.5 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.685% (RMSE 26.49 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.623% (RMSE 26.5 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.696% (RMSE 26.48 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.457% (RMSE 26.54 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.215% (RMSE 26.6 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.184% (RMSE 25.86 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.707% (RMSE 9.1 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.103% (RMSE 24.03 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.539% (RMSE 26.02 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.733% (RMSE 25.97 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.19% (RMSE 26.11 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.445% (RMSE 26.3 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.81% (RMSE 25.7 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.552% (RMSE 26.02 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.77% (RMSE 26.22 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.663% (RMSE 26.24 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.71% (RMSE 26.48 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.704% (RMSE 26.23 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.132% (RMSE 25.87 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.548% (RMSE 27.25 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.286% (RMSE 31.35 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.167% (RMSE 30.96 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.937% (RMSE 29.71 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.806% (RMSE 31.03 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.629% (RMSE 32.11 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.711% (RMSE 32.1 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.685% (RMSE 32.51 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.762% (RMSE 32.49 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.256% (RMSE 32.19 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.391% (RMSE 31.33 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.434% (RMSE 9.98 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.405% (RMSE 28.47 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.373% (RMSE 31.55 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.909% (RMSE 31.43 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.304% (RMSE 31.35 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.248% (RMSE 31.57 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.466% (RMSE 30.68 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.576% (RMSE 31.29 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.972% (RMSE 31.63 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.651% (RMSE 31.28 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.506% (RMSE 31.73 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.609% (RMSE 32.32 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.744% (RMSE 31.68 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.435% (RMSE 31.74 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.41% (RMSE 30.26 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.781% (RMSE 30.18 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.292% (RMSE 30.29 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.442% (RMSE 35.83 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.873% (RMSE 35.93 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.499% (RMSE 36.36 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.628% (RMSE 36.57 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.846% (RMSE 35.57 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.638% (RMSE 34.48 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.649% (RMSE 9.83 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.867% (RMSE 31.86 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.909% (RMSE 35.56 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.021% (RMSE 35.35 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.405% (RMSE 35.09 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.317% (RMSE 35.11 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.253% (RMSE 34.17 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.287% (RMSE 34.93 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.205% (RMSE 35.32 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.046% (RMSE 34.78 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.91% (RMSE 35.37 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.056% (RMSE 35.72 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.067% (RMSE 34.97 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.692% (RMSE 35.04 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.54% (RMSE 31.51 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.399% (RMSE 30.26 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.159% (RMSE 35.14 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.355% (RMSE 35.29 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.497% (RMSE 35.63 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.164% (RMSE 35.88 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.242% (RMSE 34.75 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.724% (RMSE 34.47 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.197% (RMSE 10.13 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.95% (RMSE 32.05 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.511% (RMSE 35.07 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.329% (RMSE 34.73 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.628% (RMSE 34.67 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.23% (RMSE 34.37 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.07% (RMSE 33.63 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.91% (RMSE 34.24 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.826% (RMSE 34.83 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.052% (RMSE 34.4 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.663% (RMSE 35.04 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.974% (RMSE 35.17 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.146% (RMSE 34.58 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.85% (RMSE 34.06 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.864% (RMSE 30.16 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.903% (RMSE 33.46 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.401% (RMSE 33.76 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.875% (RMSE 33.86 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.997% (RMSE 34.03 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.873% (RMSE 33.07 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.83% (RMSE 32.68 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.199% (RMSE 10.13 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.233% (RMSE 30.73 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.879% (RMSE 33.47 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.239% (RMSE 33 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.728% (RMSE 32.9 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.744% (RMSE 32.7 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.789% (RMSE 32.08 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.702% (RMSE 32.71 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.267% (RMSE 33.19 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.789% (RMSE 32.69 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.678% (RMSE 33.51 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.529% (RMSE 33.54 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.612% (RMSE 33.12 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.134% (RMSE 32.62 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.849% (RMSE 35.75 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.571% (RMSE 35.8 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.303% (RMSE 36.04 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.682% (RMSE 36.15 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.83% (RMSE 35.57 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.277% (RMSE 30.93 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.566% (RMSE 9.89 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.533% (RMSE 30.01 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.679% (RMSE 34.85 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.078% (RMSE 34.59 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.954% (RMSE 34.42 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.934% (RMSE 34.81 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.435% (RMSE 33.94 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.743% (RMSE 34.84 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.651% (RMSE 35.23 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.62% (RMSE 35.05 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.931% (RMSE 35.37 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.24% (RMSE 35.31 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.461% (RMSE 34.52 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.719% (RMSE 35.03 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.302% (RMSE 38.1 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.129% (RMSE 38.58 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.065% (RMSE 38.23 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.262% (RMSE 37.74 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.187% (RMSE 35.32 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.116% (RMSE 10.19 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.046% (RMSE 33.04 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.946% (RMSE 37.34 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.093% (RMSE 37.19 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.901% (RMSE 36.97 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.571% (RMSE 37.09 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.588% (RMSE 35.62 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.58% (RMSE 36.56 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.524% (RMSE 36.73 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.142% (RMSE 36.48 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.405% (RMSE 36.2 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.484% (RMSE 37.96 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.196% (RMSE 36.06 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.554% (RMSE 35.81 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.957% (RMSE 38.04 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.897% (RMSE 38.2 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.768% (RMSE 37.66 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.398% (RMSE 35.65 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.298% (RMSE 10.07 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.977% (RMSE 33.64 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.85% (RMSE 37.32 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.655% (RMSE 37.11 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.251% (RMSE 37.04 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.012% (RMSE 36.99 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.459% (RMSE 35.64 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.109% (RMSE 36.47 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.571% (RMSE 36.92 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.514% (RMSE 36.73 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.889% (RMSE 36.11 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.453% (RMSE 37.78 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.545% (RMSE 35.99 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.973% (RMSE 35.73 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.9% (RMSE 38.54 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.175% (RMSE 38.07 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.914% (RMSE 36.44 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.314% (RMSE 10.06 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.941% (RMSE 33.85 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.181% (RMSE 37.9 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.793% (RMSE 37.49 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.034% (RMSE 37 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.377% (RMSE 37.41 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.798% (RMSE 36.42 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.484% (RMSE 37.08 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.776% (RMSE 36.95 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.416% (RMSE 37.59 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.544% (RMSE 36.73 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.41% (RMSE 38.29 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.139% (RMSE 36.3 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.384% (RMSE 36.2 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.264% (RMSE 38.26 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.843% (RMSE 36.61 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.323% (RMSE 10.05 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.094% (RMSE 33.82 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.62% (RMSE 37.98 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.249% (RMSE 37.57 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.593% (RMSE 37.1 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.289% (RMSE 37.57 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.39% (RMSE 36.2 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.909% (RMSE 36.98 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.642% (RMSE 37.28 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.192% (RMSE 37.73 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.14% (RMSE 36.66 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.995% (RMSE 38.39 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.374% (RMSE 36.34 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.267% (RMSE 36.32 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.198% (RMSE 34.76 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.121% (RMSE 10.18 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.226% (RMSE 32.6 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.165% (RMSE 37.2 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.906% (RMSE 37.15 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.08% (RMSE 37.01 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.404% (RMSE 36.89 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.468% (RMSE 35.45 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.044% (RMSE 36.08 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.911% (RMSE 36.8 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.329% (RMSE 36.87 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.021% (RMSE 36.27 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.461% (RMSE 37.6 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.707% (RMSE 35.96 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.878% (RMSE 35.93 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.91% (RMSE 9.66 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.253% (RMSE 31.98 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.755% (RMSE 35.95 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.21% (RMSE 35.69 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.657% (RMSE 35.42 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.49% (RMSE 35.45 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.381% (RMSE 34.72 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.461% (RMSE 35.46 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.434% (RMSE 35.65 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.717% (RMSE 35.22 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.947% (RMSE 35.74 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.692% (RMSE 36.4 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.693% (RMSE 35.23 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.178% (RMSE 34.38 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.676% (RMSE 9.12 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.626% (RMSE 9.16 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.199% (RMSE 9.46 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.252% (RMSE 9.42 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.56% (RMSE 9.89 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.115% (RMSE 10.19 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.504% (RMSE 9.93 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.142% (RMSE 10.17 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 91.759% (RMSE 10.41 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 91.184% (RMSE 10.77 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.919% (RMSE 9.65 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.547% (RMSE 9.9 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.157% (RMSE 14.44 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.545% (RMSE 33.34 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.727% (RMSE 33.5 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.176% (RMSE 33.6 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.054% (RMSE 33.63 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.266% (RMSE 33.39 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.043% (RMSE 33.43 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.686% (RMSE 33.7 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.839% (RMSE 33.08 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.93% (RMSE 32.66 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.362% (RMSE 33.57 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.546% (RMSE 32.33 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.245% (RMSE 31.99 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.568% (RMSE 35.81 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.911% (RMSE 36.98 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.585% (RMSE 36.38 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.523% (RMSE 35.44 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.259% (RMSE 36.32 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.9% (RMSE 36.8 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.146% (RMSE 36.3 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.999% (RMSE 36.09 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.422% (RMSE 37.77 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.115% (RMSE 35.7 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.706% (RMSE 35.6 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.131% (RMSE 37.02 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.261% (RMSE 36.5 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.958% (RMSE 35.55 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.319% (RMSE 36.03 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.295% (RMSE 37.04 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.033% (RMSE 35.72 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.227% (RMSE 35.68 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.432% (RMSE 36.89 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.13% (RMSE 35.14 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.853% (RMSE 35.38 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.557% (RMSE 37.27 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.528% (RMSE 35.26 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.405% (RMSE 36.53 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.695% (RMSE 36.4 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.481% (RMSE 35.82 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.291% (RMSE 35.67 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.917% (RMSE 37.86 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.189% (RMSE 35.32 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.861% (RMSE 35.19 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.505% (RMSE 35.07 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.339% (RMSE 37.23 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.108% (RMSE 35.52 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.264% (RMSE 36.32 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.087% (RMSE 35.71 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.386% (RMSE 37.24 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.205% (RMSE 35.13 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.412% (RMSE 35.09 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.513% (RMSE 35.82 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.242% (RMSE 35.68 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.25% (RMSE 35.49 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.04% (RMSE 34.21 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.124% (RMSE 36.25 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.789% (RMSE 34.26 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.013% (RMSE 34.22 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.171% (RMSE 36.49 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.085% (RMSE 36.26 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.571% (RMSE 34.87 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.43% (RMSE 37.42 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.97% (RMSE 35.17 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.231% (RMSE 34.75 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.335% (RMSE 36.52 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.379% (RMSE 35.28 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.655% (RMSE 37.29 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.694% (RMSE 35.41 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.079% (RMSE 35.15 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.766% (RMSE 34.65 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.177% (RMSE 36.24 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.701% (RMSE 34.47 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.906% (RMSE 34.43 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.711% (RMSE 36.4 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.566% (RMSE 35.99 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.181% (RMSE 36.31 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.122% (RMSE 36.25 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.445% (RMSE 36.01 vs. 36.27 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.458% (RMSE 36.01 vs. 36.27 null)

Predictive success by metric is explored:

dfSmallR2Cloud <- as.data.frame(mtxSmallCloud) %>% 
    purrr::set_names(c("idx1", "idx2", "r2")) %>% 
    tibble::as_tibble() %>% 
    mutate(var1=possCloudVars[idx1], var2=possCloudVars[idx2], rn=row_number()) 
dfSmallR2Cloud %>% arrange(desc(r2)) %>% select(var1, var2, r2) %>% print(n=20)
## # A tibble: 666 × 3
##    var1                       var2                             r2
##    <chr>                      <chr>                         <dbl>
##  1 cloudcover_low             weathercode                   0.948
##  2 cloudcover_mid             weathercode                   0.937
##  3 weathercode                vapor_pressure_deficit        0.937
##  4 weathercode                soil_temperature_0_to_7cm     0.936
##  5 apparent_temperature       weathercode                   0.936
##  6 temperature_2m             weathercode                   0.935
##  7 weathercode                soil_temperature_28_to_100cm  0.933
##  8 weathercode                soil_temperature_7_to_28cm    0.932
##  9 relativehumidity_2m        weathercode                   0.931
## 10 dewpoint_2m                weathercode                   0.931
## 11 weathercode                doy                           0.929
## 12 et0_fao_evapotranspiration weathercode                   0.929
## 13 shortwave_radiation        weathercode                   0.926
## 14 diffuse_radiation          weathercode                   0.926
## 15 weathercode                soil_temperature_100_to_255cm 0.926
## 16 weathercode                month                         0.925
## 17 surface_pressure           weathercode                   0.925
## 18 weathercode                soil_moisture_7_to_28cm       0.925
## 19 cloudcover_high            weathercode                   0.924
## 20 winddirection_100m         weathercode                   0.923
## # ℹ 646 more rows
dfSmallR2Cloud %>% 
    pivot_longer(cols=c(var1, var2)) %>% 
    group_by(value) %>% 
    summarize(across(r2, .fns=list("min"=min, "mu"=mean, "max"=max))) %>% 
    ggplot(aes(x=fct_reorder(value, r2_mu))) + 
    coord_flip() + 
    geom_point(aes(y=r2_mu)) + 
    geom_errorbar(aes(ymin=r2_min, ymax=r2_max)) + 
    lims(y=c(NA, 1)) + 
    geom_hline(yintercept=1, lty=2, color="red") +
    labs(title="R-squared in every 2-predictor model including self and one other", 
         subtitle="Predicting cloud cover", 
         y="Range of R-squared (min-mean-max)", 
         x=NULL
    )

dfSmallR2Cloud %>% 
    arrange(desc(r2)) %>% 
    filter(var2!="weathercode", var1!="weathercode") %>% 
    select(var1, var2, r2) %>% 
    print(n=20)
## # A tibble: 630 × 3
##    var1                var2                             r2
##    <chr>               <chr>                         <dbl>
##  1 cloudcover_low      cloudcover_mid                0.906
##  2 cloudcover_low      cloudcover_high               0.834
##  3 cloudcover_low      direct_normal_irradiance      0.673
##  4 cloudcover_low      diffuse_radiation             0.670
##  5 cloudcover_low      soil_moisture_100_to_255cm    0.663
##  6 cloudcover_low      soil_moisture_0_to_7cm        0.662
##  7 surface_pressure    cloudcover_low                0.661
##  8 cloudcover_low      soil_temperature_28_to_100cm  0.661
##  9 cloudcover_low      soil_temperature_100_to_255cm 0.659
## 10 precipitation       cloudcover_low                0.656
## 11 cloudcover_low      direct_radiation              0.655
## 12 cloudcover_low      shortwave_radiation           0.654
## 13 cloudcover_low      soil_temperature_7_to_28cm    0.652
## 14 cloudcover_low      month                         0.652
## 15 cloudcover_low      et0_fao_evapotranspiration    0.652
## 16 temperature_2m      cloudcover_low                0.652
## 17 relativehumidity_2m cloudcover_low                0.651
## 18 cloudcover_low      soil_temperature_0_to_7cm     0.651
## 19 cloudcover_low      vapor_pressure_deficit        0.651
## 20 rain                cloudcover_low                0.651
## # ℹ 610 more rows
dfSmallR2Cloud %>% 
    filter(var2!="weathercode", var1!="weathercode") %>% 
    pivot_longer(cols=c(var1, var2)) %>% 
    group_by(value) %>% 
    summarize(across(r2, .fns=list("min"=min, "mu"=mean, "max"=max))) %>% 
    ggplot(aes(x=fct_reorder(value, r2_mu))) + 
    coord_flip() + 
    geom_point(aes(y=r2_mu)) + 
    geom_errorbar(aes(ymin=r2_min, ymax=r2_max)) + 
    lims(y=c(NA, 1)) + 
    geom_hline(yintercept=1, lty=2, color="red") +
    labs(title="R-squared in every 2-predictor model including self and one other", 
         subtitle="Predicting cloud cover (excluding variable paired with 'weathercode')", 
         y="Range of R-squared (min-mean-max)", 
         x=NULL
    )

Select combinations are explored using the full training dataset, with mtry=3:

possLargeVars <- c("weathercode", 
                   "cloudcover_low", 
                   "cloudcover_mid", 
                   "cloudcover_high"
                   )
possLargeVars
## [1] "weathercode"     "cloudcover_low"  "cloudcover_mid"  "cloudcover_high"
mtxLargeCloud <- matrix(nrow=0, ncol=4)

for(idx1 in 1:(length(possLargeVars)-2)) {
    for(idx2 in (idx1+1):(length(possLargeVars)-1)) {
        for(idx3 in (idx2+1):(length(possLargeVars))) {
            r2LargeCloud <- runFullRF(dfTrain=dfTrainCloud[,], 
                                      yVar="cloudcover", 
                                      xVars=possLargeVars[c(idx1, idx2, idx3)], 
                                      dfTest=dfTestCloud, 
                                      useLabel=keyLabel, 
                                      useSub=stringr::str_to_sentence(keyLabel), 
                                      isContVar=TRUE,
                                      mtry=3,
                                      makePlots=FALSE,
                                      returnData=TRUE
                                      )[["rfAcc"]][["r2"]]
            mtxLargeCloud <- rbind(mtxLargeCloud, c(idx1, idx2, idx3, r2LargeCloud))
        }
    }
}
## Growing trees.. Progress: 49%. Estimated remaining time: 31 seconds.
## Growing trees.. Progress: 98%. Estimated remaining time: 1 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 98.104% (RMSE 5 vs. 36.27 null)
## Growing trees.. Progress: 52%. Estimated remaining time: 28 seconds.
## Growing trees.. Progress: 100%. Estimated remaining time: 0 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 97.085% (RMSE 6.19 vs. 36.27 null)
## Growing trees.. Progress: 43%. Estimated remaining time: 41 seconds.
## Growing trees.. Progress: 84%. Estimated remaining time: 11 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 95.218% (RMSE 7.93 vs. 36.27 null)
## Growing trees.. Progress: 43%. Estimated remaining time: 41 seconds.
## Growing trees.. Progress: 86%. Estimated remaining time: 9 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.998% (RMSE 0.17 vs. 36.27 null)
dfLargeR2Cloud <- as.data.frame(mtxLargeCloud) %>% 
    purrr::set_names(c("idx1", "idx2", "idx3", "r2")) %>% 
    tibble::as_tibble() %>% 
    mutate(var1=possLargeVars[idx1], var2=possLargeVars[idx2], var3=possLargeVars[idx3], rn=row_number()) 
dfLargeR2Cloud %>% arrange(desc(r2)) %>% select(var1, var2, var3, r2) %>% print(n=20)
## # A tibble: 4 × 4
##   var1           var2           var3               r2
##   <chr>          <chr>          <chr>           <dbl>
## 1 cloudcover_low cloudcover_mid cloudcover_high 1.00 
## 2 weathercode    cloudcover_low cloudcover_mid  0.981
## 3 weathercode    cloudcover_low cloudcover_high 0.971
## 4 weathercode    cloudcover_mid cloudcover_high 0.952

The three cloud cover subtypes, in combination, have almost perfect predictive power on overall cloud cover

A linear model is run for comparison:

lmMiniCloud <- allCity %>% 
    filter(tt=="train", year<2022) %>%
    select(c=cloudcover, l=cloudcover_low, m=cloudcover_mid, h=cloudcover_high) %>%
    lm(c~l*m*h, data=.) 
summary(lmMiniCloud)
## 
## Call:
## lm(formula = c ~ l * m * h, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.6112 -0.8934  0.0381  0.1074 19.3437 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.074e-01  7.397e-03  -14.52   <2e-16 ***
## l            9.280e-01  2.442e-04 3799.73   <2e-16 ***
## m            6.507e-01  3.641e-04 1787.35   <2e-16 ***
## h            3.093e-01  1.734e-04 1783.49   <2e-16 ***
## l:m         -4.984e-03  6.316e-06 -789.05   <2e-16 ***
## l:h         -1.401e-03  5.068e-06 -276.49   <2e-16 ***
## m:h         -1.996e-04  5.159e-06  -38.69   <2e-16 ***
## l:m:h       -2.158e-05  9.538e-08 -226.20   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.927 on 368102 degrees of freedom
## Multiple R-squared:  0.9935, Adjusted R-squared:  0.9935 
## F-statistic: 8.006e+06 on 7 and 368102 DF,  p-value: < 2.2e-16
ggMiniCloud <- predict(lmMiniCloud, 
                      newdata=allCity %>% 
                          filter(tt=="test", year==2022) %>% 
                          select(c=cloudcover, l=cloudcover_low, m=cloudcover_mid, h=cloudcover_high)
                      ) %>% 
    mutate(select(allCity %>% filter(tt=="test", year==2022), cloudcover), 
           pred=., 
           err=pred-cloudcover, 
           err2=err**2, 
           rnd5=round(cloudcover/5)*5
           ) %>% 
    group_by(rnd5) %>% 
    summarize(n=n(), across(.cols=where(is.numeric), .fns=mean))
ggMiniCloud %>% print(n=25)
## # A tibble: 21 × 6
##     rnd5     n cloudcover     pred     err    err2
##    <dbl> <dbl>      <dbl>    <dbl>   <dbl>   <dbl>
##  1     0  4697      0.208   0.0977 -0.111   0.0255
##  2     5   820      4.68    4.78    0.0924  0.119 
##  3    10   590      9.78   10.1     0.296   0.214 
##  4    15   483     14.9    15.4     0.440   0.393 
##  5    20   437     20.2    20.8     0.619   0.672 
##  6    25   429     25.1    25.8     0.712   0.846 
##  7    30  1073     30.0    30.7     0.754   0.791 
##  8    35   354     35.0    35.8     0.847   1.53  
##  9    40   300     40.2    40.9     0.766   2.15  
## 10    45   236     44.9    45.3     0.457   2.94  
## 11    50   219     50.0    50.3     0.254   4.63  
## 12    55   195     54.9    55.3     0.408   6.77  
## 13    60   250     59.9    61.3     1.37   13.4   
## 14    65   184     64.9    64.9     0.0396 11.5   
## 15    70   134     69.8    69.0    -0.810  15.0   
## 16    75   148     75.0    74.3    -0.681  19.3   
## 17    80   156     79.9    79.3    -0.654  24.4   
## 18    85   146     85.0    84.7    -0.227  32.4   
## 19    90   710     90.0    91.9     1.88   16.7   
## 20    95   155     94.7    90.8    -3.91   43.8   
## 21   100  1413     99.9   101.      0.984  37.4
ggMiniCloud %>% 
    select(rnd5, cloudcover, pred) %>%
    pivot_longer(cols=-c(rnd5)) %>%
    ggplot(aes(x=rnd5, y=value)) + 
    geom_line(aes(group=name, 
                  color=c("pred"="Predicted Mean", "cloudcover"="Actual Mean")[name]
                  )
              ) + 
    labs(title="Actual vs. Predicted Cloud Cover Using Linear Model on Holdout Data", 
         x="Actual cloud cover (rounded to nearest 5)", 
         y="Average cloud cover for metric"
         ) + 
    scale_color_discrete("Metric") + 
    geom_abline(slope=1, intercept=0, lty=2)

The linear model generally makes strong predictions, though with generally lower accuracy on cloudier days. Distribution of errors is explored:

predict(lmMiniCloud, 
        newdata=allCity %>% 
            filter(tt=="test", year==2022) %>% 
            select(c=cloudcover, l=cloudcover_low, m=cloudcover_mid, h=cloudcover_high)
        ) %>% 
    mutate(select(allCity %>% filter(tt=="test", year==2022), cloudcover), 
           pred=., 
           err=pred-cloudcover, 
           err2=err**2, 
           rnd5=round(cloudcover/5)*5, 
           rndCat=case_when(cloudcover<10~"1) clear (<10)", 
                            cloudcover<50~"2) partly (10-50)", 
                            cloudcover<90~"3) mostly (50-90)", 
                            TRUE~"4) cloudy (>90)"
                            )
           ) %>%
    ggplot(aes(x=err)) + 
    geom_histogram(fill="lightblue") + 
    labs(title="Errors in linear model cloud cover prediction by amount of clouds", 
         x="Error (Predicted minus Actual)", 
         y="# Observations"
         ) + 
    facet_wrap(~rndCat, scales="free")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Predictions for the random forest model are also explored:

rfSubCloudPred <- runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022), 
                            yVar="cloudcover", 
                            xVars=c(varsTrain[str_detect(varsTrain, pattern="cloudcover_")]), 
                            dfTest=allCity %>% filter(tt=="test", year==2022), 
                            useLabel=keyLabel, 
                            useSub=stringr::str_to_sentence(keyLabel), 
                            isContVar=TRUE,
                            mtry=3,
                            rndTo=-1L,
                            refXY=TRUE,
                            returnData=TRUE
                            )[["tstPred"]]
## Growing trees.. Progress: 46%. Estimated remaining time: 36 seconds.
## Growing trees.. Progress: 98%. Estimated remaining time: 1 seconds.

## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.998% (RMSE 0.17 vs. 36.27 null)
## `geom_smooth()` using formula = 'y ~ x'

rfSubCloudPred
## # A tibble: 13,129 × 84
##    src   time                date        hour temperature_2m relativehumidity_2m
##    <chr> <dttm>              <date>     <int>          <dbl>               <int>
##  1 NYC   2022-01-01 00:00:00 2022-01-01     0            9.2                  97
##  2 NYC   2022-01-01 01:00:00 2022-01-01     1            8.9                  98
##  3 NYC   2022-01-01 10:00:00 2022-01-01    10            9.8                  98
##  4 NYC   2022-01-01 11:00:00 2022-01-01    11           10.2                  99
##  5 NYC   2022-01-02 00:00:00 2022-01-02     0            9.7                  99
##  6 NYC   2022-01-02 02:00:00 2022-01-02     2            9.7                  97
##  7 NYC   2022-01-02 03:00:00 2022-01-02     3            9.7                 100
##  8 NYC   2022-01-02 05:00:00 2022-01-02     5            9.7                  99
##  9 NYC   2022-01-02 12:00:00 2022-01-02    12           12.5                  92
## 10 NYC   2022-01-02 16:00:00 2022-01-02    16           12.4                  90
## # ℹ 13,119 more rows
## # ℹ 78 more variables: dewpoint_2m <dbl>, apparent_temperature <dbl>,
## #   pressure_msl <dbl>, surface_pressure <dbl>, precipitation <dbl>,
## #   rain <dbl>, snowfall <dbl>, cloudcover <int>, cloudcover_low <int>,
## #   cloudcover_mid <int>, cloudcover_high <int>, shortwave_radiation <dbl>,
## #   direct_radiation <dbl>, direct_normal_irradiance <dbl>,
## #   diffuse_radiation <dbl>, windspeed_10m <dbl>, windspeed_100m <dbl>, …

Distribution of errors from the random forest model is explored:

rfSubCloudPred %>% 
    mutate(err=pred-cloudcover, 
           err2=err**2, 
           rnd5=round(cloudcover/5)*5, 
           rndCat=case_when(cloudcover<10~"1) clear (<10)", 
                            cloudcover<50~"2) partly (10-50)", 
                            cloudcover<90~"3) mostly (50-90)", 
                            TRUE~"4) cloudy (>90)"
                            )
           ) %>%
    ggplot(aes(x=err)) + 
    geom_histogram(fill="lightblue") + 
    labs(title="Errors in linear model cloud cover prediction by amount of clouds", 
         x="Error (Predicted minus Actual)", 
         y="# Observations"
         ) + 
    facet_wrap(~rndCat, scales="free")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

There are essentially no prediction errors at any level of overall cloudiness

The impact of varying mtry is also explored:

sapply(1:3, FUN=function(mt) { 
    runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022), 
              yVar="cloudcover", 
              xVars=c(varsTrain[str_detect(varsTrain, pattern="cloudcover_")]), 
              dfTest=allCity %>% filter(tt=="test", year==2022), 
              useLabel=keyLabel, 
              useSub=stringr::str_to_sentence(keyLabel), 
              isContVar=TRUE,
              mtry=mt,
              rndTo=-1L,
              refXY=TRUE,
              makePlots=FALSE,
              returnData=TRUE
              )[["rfAcc"]][["r2"]]
    }
    )
## Growing trees.. Progress: 98%. Estimated remaining time: 0 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.115% (RMSE 3.41 vs. 36.27 null)
## Growing trees.. Progress: 59%. Estimated remaining time: 21 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.998% (RMSE 0.15 vs. 36.27 null)
## Growing trees.. Progress: 35%. Estimated remaining time: 58 seconds.
## Growing trees.. Progress: 70%. Estimated remaining time: 26 seconds.
## Growing trees.. Progress: 99%. Estimated remaining time: 0 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.998% (RMSE 0.17 vs. 36.27 null)
## [1] 0.9911463 0.9999822 0.9999787

With mtry=1 (single variable per tree), R-squared on the test data is slightly over 99%. With mtry=2 or mtry=3, R-squared on the test data is almost exactly 100%

A model is run to predict rain, at first allowing precipitation and snowfall as predictors:

keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
rfRainFull <- runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022), 
                      yVar="rain", 
                      xVars=c(varsTrain[!str_detect(varsTrain, "^rain$")]), 
                      dfTest=allCity %>% filter(tt=="test", year==2022), 
                      useLabel=keyLabel, 
                      useSub=stringr::str_to_sentence(keyLabel), 
                      isContVar=TRUE,
                      rndTo=-1L,
                      refXY=TRUE,
                      returnData=TRUE
                      )
## Growing trees.. Progress: 28%. Estimated remaining time: 1 minute, 21 seconds.
## Growing trees.. Progress: 58%. Estimated remaining time: 44 seconds.
## Growing trees.. Progress: 87%. Estimated remaining time: 13 seconds.

## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.386% (RMSE 0.16 vs. 0.6 null)
## `geom_smooth()` using formula = 'y ~ x'

The model is effective at predicting rain, primarily by leveraging highly associated predictors precipitation, and weather code. The model generally under-predicts high rainfall observations

A similar process is run using the linear model:

# Eliminate diffuse radiation due to rank-deficiency
lmRainFull <- lm(rain ~ ., 
                 data=allCity %>% 
                     filter(tt=="train", year<2022) %>% 
                     select(all_of(varsTrain)) %>% 
                     select(-diffuse_radiation)
                 )
summary(lmRainFull)
## 
## Call:
## lm(formula = rain ~ ., data = allCity %>% filter(tt == "train", 
##     year < 2022) %>% select(all_of(varsTrain)) %>% select(-diffuse_radiation))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.006873 -0.000317  0.000062  0.000344  0.095935 
## 
## Coefficients:
##                                 Estimate Std. Error    t value Pr(>|t|)    
## (Intercept)                    7.313e-02  4.414e-03     16.567  < 2e-16 ***
## hour                          -1.486e-06  1.159e-06     -1.282 0.199932    
## temperature_2m                -2.797e-04  1.101e-05    -25.397  < 2e-16 ***
## relativehumidity_2m           -1.799e-05  1.105e-06    -16.280  < 2e-16 ***
## dewpoint_2m                    2.025e-05  3.676e-06      5.508 3.63e-08 ***
## apparent_temperature           1.623e-04  9.273e-06     17.498  < 2e-16 ***
## pressure_msl                  -1.817e-05  1.645e-06    -11.047  < 2e-16 ***
## surface_pressure               7.054e-06  7.325e-07      9.630  < 2e-16 ***
## precipitation                  9.991e-01  1.545e-05  64674.862  < 2e-16 ***
## snowfall                      -1.428e+00  1.367e-04 -10451.677  < 2e-16 ***
## cloudcover                     1.009e-05  7.635e-07     13.217  < 2e-16 ***
## cloudcover_low                -8.663e-06  5.741e-07    -15.091  < 2e-16 ***
## cloudcover_mid                -5.018e-06  4.421e-07    -11.351  < 2e-16 ***
## cloudcover_high               -5.233e-06  2.753e-07    -19.013  < 2e-16 ***
## shortwave_radiation            2.791e-08  1.715e-07      0.163 0.870683    
## direct_radiation              -1.606e-07  1.831e-07     -0.877 0.380283    
## direct_normal_irradiance       1.079e-07  6.804e-08      1.585 0.112955    
## windspeed_10m                  4.659e-05  4.417e-06     10.548  < 2e-16 ***
## windspeed_100m                -1.024e-05  2.782e-06     -3.681 0.000232 ***
## winddirection_10m             -7.432e-08  1.125e-07     -0.661 0.508879    
## winddirection_100m            -1.548e-07  1.135e-07     -1.365 0.172369    
## windgusts_10m                 -1.217e-05  1.496e-06     -8.139 4.01e-16 ***
## et0_fao_evapotranspiration    -7.852e-04  1.987e-04     -3.951 7.77e-05 ***
## weathercode                    6.502e-05  5.875e-07    110.678  < 2e-16 ***
## vapor_pressure_deficit         5.257e-04  2.153e-05     24.415  < 2e-16 ***
## soil_temperature_0_to_7cm      5.228e-06  3.289e-06      1.590 0.111923    
## soil_temperature_7_to_28cm    -5.662e-06  5.539e-06     -1.022 0.306660    
## soil_temperature_28_to_100cm  -5.370e-06  6.121e-06     -0.877 0.380292    
## soil_temperature_100_to_255cm  2.837e-07  3.154e-06      0.090 0.928331    
## soil_moisture_0_to_7cm        -2.339e-03  1.891e-04    -12.370  < 2e-16 ***
## soil_moisture_7_to_28cm        1.420e-03  2.696e-04      5.268 1.38e-07 ***
## soil_moisture_28_to_100cm      1.086e-04  2.032e-04      0.535 0.592908    
## soil_moisture_100_to_255cm     2.417e-04  1.998e-04      1.210 0.226269    
## year                          -2.917e-05  2.129e-06    -13.700  < 2e-16 ***
## doy                           -3.149e-07  8.502e-08     -3.704 0.000212 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.004157 on 368075 degrees of freedom
## Multiple R-squared:  0.9999, Adjusted R-squared:  0.9999 
## F-statistic: 1.808e+08 on 34 and 368075 DF,  p-value: < 2.2e-16
allCity %>% 
    filter(tt=="test", year==2022) %>%
    mutate(pred=predict(lmRainFull, newdata=.)) %>%
    summarize(meModel=mean((pred-rain)**2), 
              meBase=mean((rain-mean(rain))**2), 
              r2=1-meModel/meBase, 
              rmse=sqrt(meModel)
              )
## # A tibble: 1 × 4
##      meModel meBase    r2    rmse
##        <dbl>  <dbl> <dbl>   <dbl>
## 1 0.00000172  0.354  1.00 0.00131
summary(lmRainFull)$coefficients %>% 
    as.data.frame() %>% 
    rownames_to_column("Variable") %>% 
    tibble::as_tibble() %>% 
    arrange(desc(abs(`t value`)))
## # A tibble: 35 × 5
##    Variable                  Estimate `Std. Error` `t value` `Pr(>|t|)`
##    <chr>                        <dbl>        <dbl>     <dbl>      <dbl>
##  1 precipitation           0.999       0.0000154     64675.   0        
##  2 snowfall               -1.43        0.000137     -10452.   0        
##  3 weathercode             0.0000650   0.000000588     111.   0        
##  4 temperature_2m         -0.000280    0.0000110       -25.4  3.65e-142
##  5 vapor_pressure_deficit  0.000526    0.0000215        24.4  1.50e-131
##  6 cloudcover_high        -0.00000523  0.000000275     -19.0  1.46e- 80
##  7 apparent_temperature    0.000162    0.00000927       17.5  1.59e- 68
##  8 (Intercept)             0.0731      0.00441          16.6  1.26e- 61
##  9 relativehumidity_2m    -0.0000180   0.00000110      -16.3  1.45e- 59
## 10 cloudcover_low         -0.00000866  0.000000574     -15.1  1.92e- 51
## # ℹ 25 more rows

The linear model has very strong explanatory and predictive power. Rain (mm) appears defined in the raw data as precipitation (mm) minus snowfall (cm) divided by 0.7:

# Eliminate diffuse radiation due to rank-deficiency
lmRainTwo <- lm(rain ~ precipitation + snowfall, 
                 data=allCity %>% 
                     filter(tt=="train", year<2022) %>% 
                     select(all_of(varsTrain)) %>% 
                     select(-diffuse_radiation)
                 )
summary(lmRainTwo)
## 
## Call:
## lm(formula = rain ~ precipitation + snowfall, data = allCity %>% 
##     filter(tt == "train", year < 2022) %>% select(all_of(varsTrain)) %>% 
##     select(-diffuse_radiation))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.019490 -0.000178 -0.000178 -0.000178  0.099488 
## 
## Coefficients:
##                 Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)    1.777e-04  7.136e-06     24.9   <2e-16 ***
## precipitation  9.999e-01  1.307e-05  76484.8   <2e-16 ***
## snowfall      -1.424e+00  1.343e-04 -10602.9   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.004261 on 368107 degrees of freedom
## Multiple R-squared:  0.9999, Adjusted R-squared:  0.9999 
## F-statistic: 2.925e+09 on 2 and 368107 DF,  p-value: < 2.2e-16
allCity %>% 
    filter(tt=="test", year==2022) %>%
    mutate(pred=predict(lmRainTwo, newdata=.)) %>%
    summarize(meModel=mean((pred-rain)**2), 
              meBase=mean((rain-mean(rain))**2), 
              r2=1-meModel/meBase, 
              rmse=sqrt(meModel)
              )
## # A tibble: 1 × 4
##       meModel meBase    r2     rmse
##         <dbl>  <dbl> <dbl>    <dbl>
## 1 0.000000833  0.354  1.00 0.000913
summary(lmRainTwo)$coefficients %>% 
    as.data.frame() %>% 
    rownames_to_column("Variable") %>% 
    tibble::as_tibble() %>% 
    arrange(desc(abs(`t value`)))
## # A tibble: 3 × 5
##   Variable       Estimate `Std. Error` `t value` `Pr(>|t|)`
##   <chr>             <dbl>        <dbl>     <dbl>      <dbl>
## 1 precipitation  1.00       0.0000131    76485.   0        
## 2 snowfall      -1.42       0.000134    -10603.   0        
## 3 (Intercept)    0.000178   0.00000714      24.9  8.99e-137

The random forest model is re-run to predict rain, using only precipitation and snowfall as predictors:

keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
rfRainTwo <- runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022), 
                       yVar="rain", 
                       xVars=c("precipitation", "snowfall"), 
                       dfTest=allCity %>% filter(tt=="test", year==2022), 
                       useLabel=keyLabel, 
                       useSub=stringr::str_to_sentence(keyLabel), 
                       isContVar=TRUE,
                       rndTo=-1L,
                       mtry=2,
                       refXY=TRUE,
                       returnData=TRUE
                       )[["tstPred"]]

## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.996% (RMSE 0 vs. 0.6 null)
## `geom_smooth()` using formula = 'y ~ x'

The random forest model is similarly effective at predicting rain using precipitation and snowfall

All combinations of two variables are explored for predicting rain on a smaller training dataset:

# Train and test data created previously (dfTrainCloud and dfTestCloud)
# Variables to explore
possRainVars <- c(varsTrain[!str_detect(varsTrain, "rain")], "month", "tod")

# Subsets to use
set.seed(24081818)
idxSmallRain <- sample(1:nrow(dfTrainCloud), 5000, replace=FALSE)
mtxSmallRain <- matrix(nrow=0, ncol=3)

for(idx1 in 1:(length(possRainVars)-1)) {
    for(idx2 in (idx1+1):length(possRainVars)) {
        r2SmallRain <- runFullRF(dfTrain=dfTrainCloud[idxSmallRain,], 
                                 yVar="rain", 
                                 xVars=possRainVars[c(idx1, idx2)], 
                                 dfTest=dfTestCloud, 
                                 useLabel=keyLabel, 
                                 useSub=stringr::str_to_sentence(keyLabel), 
                                 isContVar=TRUE,
                                 mtry=2,
                                 makePlots=FALSE,
                                 returnData=TRUE
                                 )[["rfAcc"]][["r2"]]
        mtxSmallRain <- rbind(mtxSmallRain, c(idx1, idx2, r2SmallRain))
    }
}
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.708% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.364% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.644% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.527% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.8% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.14% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.315% (RMSE 0.14 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.238% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.015% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.062% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.62% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.133% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.818% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.185% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.803% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.037% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -45.453% (RMSE 0.72 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -20.942% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.651% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.902% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.551% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.19% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.312% (RMSE 0.22 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.804% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.522% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.274% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.98% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.79% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.428% (RMSE 0.58 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.342% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.33% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.011% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.574% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.104% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.375% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.238% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.208% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -21.125% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.098% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.15% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.43% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.975% (RMSE 0.13 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.921% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.4% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.092% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.524% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -24.772% (RMSE 0.66 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.088% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.517% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.366% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.742% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -30.821% (RMSE 0.68 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.394% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.131% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.84% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.915% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.407% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.987% (RMSE 0.25 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.187% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.972% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.826% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.226% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -23.239% (RMSE 0.66 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.906% (RMSE 0.57 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.736% (RMSE 0.59 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.9% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.198% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.943% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.016% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -24.189% (RMSE 0.66 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.242% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.323% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.372% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.186% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.173% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.443% (RMSE 0.14 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.307% (RMSE 0.59 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.599% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.027% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.808% (RMSE 0.59 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.286% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.044% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.833% (RMSE 0.59 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.791% (RMSE 0.59 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.448% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.832% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.789% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.799% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.871% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.185% (RMSE 0.58 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.77% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.678% (RMSE 0.2 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.407% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.922% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.738% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.599% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.665% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.126% (RMSE 0.55 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.29% (RMSE 0.59 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.634% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.639% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.165% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.71% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.893% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.044% (RMSE 0.59 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.063% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.414% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.088% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 95.037% (RMSE 0.13 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.79% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.708% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -21.024% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.345% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -33.817% (RMSE 0.69 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.344% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.161% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.599% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.102% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -23.362% (RMSE 0.66 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.238% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.514% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.183% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.826% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.657% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.715% (RMSE 0.22 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.399% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.817% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.993% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.805% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.859% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.722% (RMSE 0.57 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.108% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.589% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.569% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.725% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.537% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.506% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.48% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.449% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.126% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 95.156% (RMSE 0.13 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.466% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.861% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -20.051% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.405% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -36.49% (RMSE 0.7 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.977% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.457% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.062% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.544% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -31.802% (RMSE 0.68 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.301% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.403% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.426% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.78% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.019% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.661% (RMSE 0.23 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.346% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.752% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.89% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.526% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.745% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.334% (RMSE 0.58 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.451% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.14% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.716% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.952% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.259% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -29.723% (RMSE 0.68 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.958% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -25.639% (RMSE 0.67 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.148% (RMSE 0.14 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.518% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.274% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.449% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.771% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.258% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.174% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.566% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.01% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.585% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -36.052% (RMSE 0.69 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.431% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.672% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.394% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.781% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.309% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.167% (RMSE 0.22 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.549% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.129% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.978% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.526% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.96% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.655% (RMSE 0.56 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.396% (RMSE 0.58 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.061% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.684% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.571% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.848% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.935% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.847% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.183% (RMSE 0.14 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.295% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.492% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.311% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.759% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.675% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -20.074% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -22.328% (RMSE 0.66 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -21.684% (RMSE 0.66 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -23.167% (RMSE 0.66 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -31.574% (RMSE 0.68 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.041% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.254% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.82% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.286% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -21.879% (RMSE 0.66 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.106% (RMSE 0.21 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.934% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.516% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.27% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.794% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.981% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.732% (RMSE 0.56 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.666% (RMSE 0.59 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.379% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.892% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.327% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.89% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.27% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -22.231% (RMSE 0.66 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 95.739% (RMSE 0.12 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.22% (RMSE 0.14 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.78% (RMSE 0.15 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.591% (RMSE 0.14 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.082% (RMSE 0.14 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.849% (RMSE 0.14 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.663% (RMSE 0.14 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.641% (RMSE 0.14 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.564% (RMSE 0.14 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.31% (RMSE 0.14 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.132% (RMSE 0.14 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.048% (RMSE 0.15 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.211% (RMSE 0.14 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.311% (RMSE 0.14 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.628% (RMSE 0.14 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 95.838% (RMSE 0.12 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.193% (RMSE 0.14 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 95.261% (RMSE 0.13 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.771% (RMSE 0.14 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.33% (RMSE 0.14 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.739% (RMSE 0.14 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.586% (RMSE 0.14 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.672% (RMSE 0.14 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.94% (RMSE 0.15 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.798% (RMSE 0.15 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.524% (RMSE 0.14 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.681% (RMSE 0.14 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.995% (RMSE 0.13 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.749% (RMSE 0.14 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.102% (RMSE 0.59 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.927% (RMSE 0.59 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.167% (RMSE 0.59 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.709% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.91% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.015% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.774% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.255% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -35.021% (RMSE 0.69 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.885% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.058% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.235% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.89% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.082% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.162% (RMSE 0.2 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.042% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.738% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.233% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.759% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.056% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.861% (RMSE 0.57 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.273% (RMSE 0.59 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.026% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.525% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.032% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.555% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.278% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.023% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.194% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.207% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.151% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.624% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.509% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.748% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.045% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -26.414% (RMSE 0.67 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.501% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.66% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -21.694% (RMSE 0.66 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.881% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.113% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.347% (RMSE 0.21 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.765% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.484% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.305% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.325% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.317% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.556% (RMSE 0.57 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.57% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.04% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.196% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -28.656% (RMSE 0.67 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.947% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.516% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.378% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.365% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.944% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.254% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.26% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.001% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.588% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -29.465% (RMSE 0.68 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -21.268% (RMSE 0.66 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.283% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.313% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.633% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.452% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.321% (RMSE 0.21 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.292% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.935% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.664% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.652% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.11% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.543% (RMSE 0.55 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.02% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.747% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -28.317% (RMSE 0.67 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.022% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.64% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.655% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.675% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.036% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.628% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.867% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.901% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.045% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.326% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.851% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -20.478% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.335% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.372% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.372% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.702% (RMSE 0.21 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.003% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.221% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.726% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -24.002% (RMSE 0.66 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -56.812% (RMSE 0.75 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.517% (RMSE 0.58 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.983% (RMSE 0.58 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.037% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.347% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.863% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.681% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.205% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.326% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.862% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.745% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.577% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.088% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -35.675% (RMSE 0.69 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -25.836% (RMSE 0.67 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -20.307% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.54% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.295% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.498% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.83% (RMSE 0.21 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.623% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -21.216% (RMSE 0.66 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -25.027% (RMSE 0.67 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -28.408% (RMSE 0.67 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -34.21% (RMSE 0.69 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.332% (RMSE 0.58 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.948% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.122% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.149% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.175% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -22.482% (RMSE 0.66 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.99% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.615% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.577% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.123% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.544% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -39.604% (RMSE 0.7 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.294% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.701% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.565% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.206% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.214% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.898% (RMSE 0.21 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.94% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.474% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.946% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.021% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.088% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.802% (RMSE 0.58 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.137% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.868% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.956% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.918% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.208% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.644% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.329% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.378% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.515% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -38.13% (RMSE 0.7 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.453% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.524% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.165% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.656% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.258% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.748% (RMSE 0.21 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.006% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.463% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.892% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.735% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.204% (RMSE 0.57 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.084% (RMSE 0.59 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.278% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.049% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.398% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.334% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.547% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.402% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.671% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -40.408% (RMSE 0.71 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.773% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.251% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.059% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.873% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.135% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.931% (RMSE 0.21 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.931% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.117% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.976% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.042% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.221% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.119% (RMSE 0.57 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.371% (RMSE 0.59 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.54% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.609% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.267% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.766% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.849% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.876% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -40.608% (RMSE 0.71 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -22.277% (RMSE 0.66 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.319% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.651% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.016% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.533% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.918% (RMSE 0.21 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.547% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.608% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.08% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.426% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.57% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.663% (RMSE 0.58 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.224% (RMSE 0.59 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.071% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.241% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.62% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.198% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.357% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.681% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -22.119% (RMSE 0.66 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -49.33% (RMSE 0.73 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -47.749% (RMSE 0.72 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -28.401% (RMSE 0.67 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -41.853% (RMSE 0.71 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 83.311% (RMSE 0.24 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -30.741% (RMSE 0.68 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -36.112% (RMSE 0.69 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -30.547% (RMSE 0.68 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -24.161% (RMSE 0.66 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -24.933% (RMSE 0.67 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.136% (RMSE 0.57 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.815% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.567% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -26% (RMSE 0.67 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -42.857% (RMSE 0.71 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -43.301% (RMSE 0.71 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -47.053% (RMSE 0.72 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -40.644% (RMSE 0.71 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.408% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.102% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -22.942% (RMSE 0.66 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.669% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.261% (RMSE 0.24 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.196% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.03% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.578% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.625% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.179% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.653% (RMSE 0.57 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.926% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.119% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.589% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.943% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.878% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -25.292% (RMSE 0.67 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -22.664% (RMSE 0.66 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.579% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.605% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.39% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.519% (RMSE 0.23 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.04% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.27% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.966% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.675% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.847% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.838% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.636% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.903% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.034% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.026% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.164% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.341% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.195% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.698% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.157% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.449% (RMSE 0.22 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.315% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.657% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -21.057% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.331% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.712% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.398% (RMSE 0.59 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.795% (RMSE 0.59 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.446% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.251% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.063% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.173% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -25.811% (RMSE 0.67 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.81% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.442% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.023% (RMSE 0.22 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.746% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.173% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.016% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.312% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.152% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.399% (RMSE 0.57 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.208% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.484% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.442% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.659% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.777% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.176% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.664% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.603% (RMSE 0.21 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.177% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.705% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.456% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.806% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.474% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.472% (RMSE 0.58 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.948% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.699% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.91% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.437% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.822% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.805% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.291% (RMSE 0.59 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.684% (RMSE 0.2 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.004% (RMSE 0.21 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.526% (RMSE 0.21 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.08% (RMSE 0.21 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.94% (RMSE 0.2 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 89.682% (RMSE 0.19 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.134% (RMSE 0.23 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 83.99% (RMSE 0.24 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.626% (RMSE 0.23 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.119% (RMSE 0.21 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.115% (RMSE 0.21 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.417% (RMSE 0.2 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.606% (RMSE 0.21 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.885% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.636% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.918% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.148% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.989% (RMSE 0.55 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.883% (RMSE 0.59 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.67% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.727% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.253% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.88% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.735% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.881% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.427% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.531% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.677% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.241% (RMSE 0.58 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.862% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.678% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.025% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.166% (RMSE 0.65 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.656% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.45% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.541% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.924% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.197% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.496% (RMSE 0.58 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.568% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.988% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.457% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.771% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.285% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.895% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.882% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.326% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.06% (RMSE 0.57 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.223% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.521% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.941% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.584% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.621% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.372% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.206% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.126% (RMSE 0.56 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.249% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -25.529% (RMSE 0.67 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.629% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.391% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.366% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.132% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.104% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.9% (RMSE 0.53 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.785% (RMSE 0.57 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.492% (RMSE 0.55 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.872% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.157% (RMSE 0.57 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.399% (RMSE 0.59 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.882% (RMSE 0.59 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.54% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.355% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.886% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.123% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.996% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.486% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.089% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.524% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.976% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.478% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.69% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.083% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.142% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.54% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.392% (RMSE 0.62 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.469% (RMSE 0.64 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.912% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.144% (RMSE 0.6 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.691% (RMSE 0.61 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.73% (RMSE 0.63 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.602% (RMSE 0.6 vs. 0.6 null)

Predictive success by metric is explored:

dfSmallR2Rain <- as.data.frame(mtxSmallRain) %>% 
    purrr::set_names(c("idx1", "idx2", "r2")) %>% 
    tibble::as_tibble() %>% 
    mutate(var1=possRainVars[idx1], var2=possRainVars[idx2], rn=row_number()) 
dfSmallR2Rain %>% arrange(desc(r2)) %>% select(var1, var2, r2) %>% print(n=20)
## # A tibble: 666 × 3
##    var1                 var2                             r2
##    <chr>                <chr>                         <dbl>
##  1 precipitation        weathercode                   0.958
##  2 precipitation        snowfall                      0.957
##  3 precipitation        soil_temperature_0_to_7cm     0.953
##  4 apparent_temperature precipitation                 0.952
##  5 dewpoint_2m          precipitation                 0.950
##  6 precipitation        month                         0.950
##  7 temperature_2m       precipitation                 0.950
##  8 precipitation        shortwave_radiation           0.948
##  9 precipitation        soil_temperature_7_to_28cm    0.948
## 10 precipitation        tod                           0.947
## 11 precipitation        soil_temperature_100_to_255cm 0.947
## 12 precipitation        doy                           0.947
## 13 precipitation        soil_moisture_7_to_28cm       0.947
## 14 precipitation        direct_radiation              0.947
## 15 precipitation        direct_normal_irradiance      0.946
## 16 precipitation        et0_fao_evapotranspiration    0.946
## 17 precipitation        cloudcover_mid                0.946
## 18 precipitation        soil_moisture_0_to_7cm        0.946
## 19 precipitation        diffuse_radiation             0.946
## 20 precipitation        year                          0.945
## # ℹ 646 more rows
dfSmallR2Rain %>% 
    pivot_longer(cols=c(var1, var2)) %>% 
    group_by(value) %>% 
    summarize(across(r2, .fns=list("min"=min, "mu"=mean, "max"=max))) %>% 
    ggplot(aes(x=fct_reorder(value, r2_mu))) + 
    coord_flip() + 
    geom_point(aes(y=r2_mu)) + 
    geom_errorbar(aes(ymin=r2_min, ymax=r2_max)) + 
    lims(y=c(NA, 1)) + 
    geom_hline(yintercept=1, lty=2, color="red") +
    labs(title="R-squared in every 2-predictor model including self and one other", 
         subtitle="Predicting rain", 
         y="Range of R-squared (min-mean-max)", 
         x=NULL
    )

dfSmallR2Rain %>% 
    arrange(desc(r2)) %>% 
    filter(var2!="precipitation", var1!="precipitation") %>% 
    select(var1, var2, r2) %>% 
    print(n=20)
## # A tibble: 630 × 3
##    var1                       var2                             r2
##    <chr>                      <chr>                         <dbl>
##  1 weathercode                soil_moisture_0_to_7cm        0.897
##  2 weathercode                soil_temperature_100_to_255cm 0.889
##  3 weathercode                vapor_pressure_deficit        0.887
##  4 relativehumidity_2m        weathercode                   0.887
##  5 weathercode                month                         0.884
##  6 snowfall                   weathercode                   0.882
##  7 weathercode                year                          0.881
##  8 weathercode                doy                           0.881
##  9 direct_normal_irradiance   weathercode                   0.879
## 10 diffuse_radiation          weathercode                   0.879
## 11 shortwave_radiation        weathercode                   0.879
## 12 cloudcover_high            weathercode                   0.878
## 13 direct_radiation           weathercode                   0.877
## 14 cloudcover_mid             weathercode                   0.877
## 15 weathercode                tod                           0.876
## 16 et0_fao_evapotranspiration weathercode                   0.876
## 17 weathercode                soil_temperature_7_to_28cm    0.875
## 18 cloudcover                 weathercode                   0.873
## 19 cloudcover_low             weathercode                   0.873
## 20 surface_pressure           weathercode                   0.871
## # ℹ 610 more rows
dfSmallR2Rain %>% 
    filter(var2!="precipitation", var1!="precipitation", var2!="weathercode", var1!="weathercode") %>% 
    pivot_longer(cols=c(var1, var2)) %>% 
    group_by(value) %>% 
    summarize(across(r2, .fns=list("min"=min, "mu"=mean, "max"=max))) %>% 
    ggplot(aes(x=fct_reorder(value, r2_mu))) + 
    coord_flip() + 
    geom_point(aes(y=r2_mu)) + 
    geom_errorbar(aes(ymin=r2_min, ymax=r2_max)) + 
    lims(y=c(NA, 1)) + 
    geom_hline(yintercept=1, lty=2, color="red") +
    labs(title="R-squared in every 2-predictor model including self and one other", 
         subtitle="Predicting cloud cover (excluding variable paired with 'precipitation' or 'weathercode')", 
         y="Range of R-squared (min-mean-max)", 
         x=NULL
    )

dfSmallR2Rain %>% 
    arrange(desc(r2)) %>% 
    filter(var2!="precipitation", var1!="precipitation", var2!="weathercode", var1!="weathercode") %>% 
    select(var1, var2, r2) %>% 
    print(n=20)
## # A tibble: 595 × 3
##    var1                          var2                           r2
##    <chr>                         <chr>                       <dbl>
##  1 soil_moisture_0_to_7cm        soil_moisture_7_to_28cm    0.199 
##  2 vapor_pressure_deficit        soil_moisture_0_to_7cm     0.150 
##  3 cloudcover_low                soil_moisture_0_to_7cm     0.145 
##  4 soil_moisture_0_to_7cm        soil_moisture_100_to_255cm 0.135 
##  5 relativehumidity_2m           soil_moisture_0_to_7cm     0.131 
##  6 soil_temperature_100_to_255cm soil_moisture_0_to_7cm     0.121 
##  7 surface_pressure              soil_moisture_0_to_7cm     0.117 
##  8 pressure_msl                  soil_moisture_0_to_7cm     0.107 
##  9 soil_moisture_0_to_7cm        soil_moisture_28_to_100cm  0.0979
## 10 windgusts_10m                 soil_moisture_0_to_7cm     0.0840
## 11 soil_moisture_0_to_7cm        doy                        0.0816
## 12 windspeed_10m                 soil_moisture_0_to_7cm     0.0814
## 13 soil_temperature_28_to_100cm  soil_moisture_0_to_7cm     0.0806
## 14 temperature_2m                soil_moisture_0_to_7cm     0.0791
## 15 dewpoint_2m                   soil_moisture_0_to_7cm     0.0772
## 16 windspeed_100m                soil_moisture_0_to_7cm     0.0765
## 17 cloudcover                    soil_moisture_0_to_7cm     0.0756
## 18 direct_radiation              soil_moisture_0_to_7cm     0.0720
## 19 direct_normal_irradiance      soil_moisture_0_to_7cm     0.0712
## 20 snowfall                      soil_moisture_0_to_7cm     0.0686
## # ℹ 575 more rows

Precipitation and weather code are highly predictive of rainfall, but most other predictors drive zero or even negative R-squared when applied in the test data set

Select combinations are explored using the full training dataset:

possLargeRain <- c("precipitation", 
                   "weathercode", 
                   "snowfall", 
                   "soil_moisture_0_to_7cm"
                   )
possLargeRain
## [1] "precipitation"          "weathercode"            "snowfall"              
## [4] "soil_moisture_0_to_7cm"
mtxLargeRain <- matrix(nrow=0, ncol=3)

for(idx1 in 1:(length(possLargeRain)-1)) {
    for(idx2 in (idx1+1):length(possLargeRain)) {
        r2LargeRain <- runFullRF(dfTrain=dfTrainCloud[,], 
                                 yVar="rain", 
                                 xVars=possLargeRain[c(idx1, idx2)], 
                                 dfTest=dfTestCloud,
                                 useLabel=keyLabel, 
                                 useSub=stringr::str_to_sentence(keyLabel), 
                                 isContVar=TRUE,
                                 mtry=2,
                                 makePlots=FALSE,
                                 returnData=TRUE
                                 )[["rfAcc"]][["r2"]]
        mtxLargeRain <- rbind(mtxLargeRain, c(idx1, idx2, r2LargeRain))
    }
}
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.988% (RMSE 0.01 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.997% (RMSE 0 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 98.565% (RMSE 0.07 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.48% (RMSE 0.2 vs. 0.6 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.942% (RMSE 0.2 vs. 0.6 null)
## Growing trees.. Progress: 80%. Estimated remaining time: 7 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.938% (RMSE 0.53 vs. 0.6 null)
dfLargeR2Rain <- as.data.frame(mtxLargeRain) %>% 
    purrr::set_names(c("idx1", "idx2", "r2")) %>% 
    tibble::as_tibble() %>% 
    mutate(var1=possLargeRain[idx1], var2=possLargeRain[idx2], rn=row_number()) 
dfLargeR2Rain %>% arrange(desc(r2)) %>% select(var1, var2, r2) %>% print(n=20)
## # A tibble: 6 × 3
##   var1          var2                      r2
##   <chr>         <chr>                  <dbl>
## 1 precipitation snowfall               1.00 
## 2 precipitation weathercode            1.00 
## 3 precipitation soil_moisture_0_to_7cm 0.986
## 4 weathercode   soil_moisture_0_to_7cm 0.889
## 5 weathercode   snowfall               0.885
## 6 snowfall      soil_moisture_0_to_7cm 0.219

In contrast to previous models, R2 for predicting rain is significantly improved by access to a much larger training dataset

A model is run to predict snowfall, at first allowing precipitation and rain as predictors:

keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
rfSnowFull <- runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022), 
                        yVar="snowfall", 
                        xVars=c(varsTrain[!str_detect(varsTrain, "^snowfall$")]), 
                        dfTest=allCity %>% filter(tt=="test", year==2022), 
                        useLabel=keyLabel, 
                        useSub=stringr::str_to_sentence(keyLabel), 
                        isContVar=TRUE,
                        rndTo=-1L,
                        refXY=TRUE,
                        returnData=TRUE
                        )
## Growing trees.. Progress: 41%. Estimated remaining time: 43 seconds.
## Growing trees.. Progress: 80%. Estimated remaining time: 15 seconds.

## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.034% (RMSE 0.01 vs. 0.04 null)
## `geom_smooth()` using formula = 'y ~ x'

The model is reasonably effective at predicting snowfall, primarily by leveraging highly associated predictors precipitation and weather code. The model generally under-predicts high rainfall observations

A similar process is run using the linear model:

# Eliminate diffuse radiation due to rank-deficiency
lmSnowFull <- lm(snowfall ~ ., 
                 data=allCity %>% 
                     filter(tt=="train", year<2022) %>% 
                     mutate(weathercode=factor(weathercode)) %>%
                     select(all_of(varsTrain)) %>% 
                     select(-diffuse_radiation)
                 )
summary(lmSnowFull)
## 
## Call:
## lm(formula = snowfall ~ ., data = allCity %>% filter(tt == "train", 
##     year < 2022) %>% mutate(weathercode = factor(weathercode)) %>% 
##     select(all_of(varsTrain)) %>% select(-diffuse_radiation))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.020446 -0.000074 -0.000007  0.000065  0.075432 
## 
## Coefficients:
##                                 Estimate Std. Error   t value Pr(>|t|)    
## (Intercept)                    3.268e-02  2.648e-03    12.341  < 2e-16 ***
## hour                          -3.658e-07  6.946e-07    -0.527 0.598452    
## temperature_2m                -3.363e-05  6.624e-06    -5.077 3.84e-07 ***
## relativehumidity_2m           -4.568e-06  6.629e-07    -6.891 5.54e-12 ***
## dewpoint_2m                    3.111e-06  2.204e-06     1.411 0.158225    
## apparent_temperature           1.942e-05  5.567e-06     3.489 0.000486 ***
## pressure_msl                  -1.782e-06  9.866e-07    -1.807 0.070819 .  
## surface_pressure              -1.605e-06  4.395e-07    -3.652 0.000261 ***
## precipitation                  6.845e-01  1.434e-04  4774.330  < 2e-16 ***
## rain                          -6.848e-01  1.456e-04 -4701.844  < 2e-16 ***
## cloudcover                     8.165e-06  7.226e-07    11.298  < 2e-16 ***
## cloudcover_low                -5.583e-06  3.729e-07   -14.972  < 2e-16 ***
## cloudcover_mid                -4.398e-06  2.786e-07   -15.789  < 2e-16 ***
## cloudcover_high               -2.148e-06  1.763e-07   -12.183  < 2e-16 ***
## shortwave_radiation            3.950e-07  1.028e-07     3.841 0.000123 ***
## direct_radiation              -3.277e-07  1.099e-07    -2.983 0.002857 ** 
## direct_normal_irradiance      -2.862e-08  4.077e-08    -0.702 0.482691    
## windspeed_10m                 -1.466e-05  2.655e-06    -5.520 3.39e-08 ***
## windspeed_100m                 5.995e-06  1.668e-06     3.593 0.000327 ***
## winddirection_10m             -1.023e-07  6.741e-08    -1.517 0.129156    
## winddirection_100m            -8.782e-08  6.798e-08    -1.292 0.196407    
## windgusts_10m                  4.359e-06  8.984e-07     4.852 1.22e-06 ***
## et0_fao_evapotranspiration    -2.485e-04  1.191e-04    -2.087 0.036930 *  
## weathercode1                  -3.130e-05  1.771e-05    -1.768 0.077143 .  
## weathercode2                  -8.146e-05  3.183e-05    -2.559 0.010504 *  
## weathercode3                  -3.591e-05  4.193e-05    -0.856 0.391780    
## weathercode51                  8.037e-05  3.765e-05     2.135 0.032762 *  
## weathercode53                  2.770e-04  4.986e-05     5.557 2.75e-08 ***
## weathercode55                  4.158e-04  7.061e-05     5.889 3.90e-09 ***
## weathercode61                  6.275e-04  6.800e-05     9.228  < 2e-16 ***
## weathercode63                  1.233e-03  1.118e-04    11.027  < 2e-16 ***
## weathercode65                  3.267e-03  3.062e-04    10.668  < 2e-16 ***
## weathercode71                  2.160e-02  6.684e-05   323.091  < 2e-16 ***
## weathercode73                  8.795e-03  1.088e-04    80.865  < 2e-16 ***
## weathercode75                  2.948e-02  3.019e-04    97.647  < 2e-16 ***
## vapor_pressure_deficit         2.460e-05  1.296e-05     1.899 0.057580 .  
## soil_temperature_0_to_7cm      1.165e-06  1.971e-06     0.591 0.554292    
## soil_temperature_7_to_28cm     3.702e-06  3.319e-06     1.115 0.264681    
## soil_temperature_28_to_100cm  -2.414e-06  3.667e-06    -0.658 0.510354    
## soil_temperature_100_to_255cm  7.237e-07  1.890e-06     0.383 0.701766    
## soil_moisture_0_to_7cm         2.170e-04  1.137e-04     1.909 0.056317 .  
## soil_moisture_7_to_28cm       -7.146e-05  1.618e-04    -0.442 0.658664    
## soil_moisture_28_to_100cm     -1.393e-05  1.218e-04    -0.114 0.908943    
## soil_moisture_100_to_255cm     6.144e-04  1.197e-04     5.131 2.89e-07 ***
## year                          -1.441e-05  1.277e-06   -11.287  < 2e-16 ***
## doy                           -1.886e-08  5.095e-08    -0.370 0.711314    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.002491 on 368064 degrees of freedom
## Multiple R-squared:  0.9978, Adjusted R-squared:  0.9978 
## F-statistic: 3.674e+06 on 45 and 368064 DF,  p-value: < 2.2e-16
allCity %>% 
    filter(tt=="test", year==2022) %>%
    mutate(weathercode=factor(weathercode)) %>%
    mutate(pred=predict(lmSnowFull, newdata=.)) %>%
    summarize(meModel=mean((pred-snowfall)**2), 
              meBase=mean((snowfall-mean(snowfall))**2), 
              r2=1-meModel/meBase, 
              rmse=sqrt(meModel)
              )
## # A tibble: 1 × 4
##      meModel  meBase    r2    rmse
##        <dbl>   <dbl> <dbl>   <dbl>
## 1 0.00000244 0.00193 0.999 0.00156
summary(lmSnowFull)$coefficients %>% 
    as.data.frame() %>% 
    rownames_to_column("Variable") %>% 
    tibble::as_tibble() %>% 
    arrange(desc(abs(`t value`)))
## # A tibble: 46 × 5
##    Variable           Estimate `Std. Error` `t value` `Pr(>|t|)`
##    <chr>                 <dbl>        <dbl>     <dbl>      <dbl>
##  1 precipitation    0.685       0.000143       4774.    0       
##  2 rain            -0.685       0.000146      -4702.    0       
##  3 weathercode71    0.0216      0.0000668       323.    0       
##  4 weathercode75    0.0295      0.000302         97.6   0       
##  5 weathercode73    0.00879     0.000109         80.9   0       
##  6 cloudcover_mid  -0.00000440  0.000000279     -15.8   3.84e-56
##  7 cloudcover_low  -0.00000558  0.000000373     -15.0   1.16e-50
##  8 (Intercept)      0.0327      0.00265          12.3   5.54e-35
##  9 cloudcover_high -0.00000215  0.000000176     -12.2   3.90e-34
## 10 cloudcover       0.00000816  0.000000723      11.3   1.36e-29
## # ℹ 36 more rows

Even with many confounders, the linear model largely identifies that precipitation and rain predict snowfall. As well, the linear model identifies weather codes 71, 73, and 75 which each mean that snow is falling

The linear model has very strong explanatory and predictive power. Snowfall (cm) appears defined in the raw data as 7 * (precipitation (mm) minus rain (mm)):

# Best predictors only
lmSnowTwo <- lm(snowfall ~ precipitation + rain, 
                data=allCity %>% 
                    filter(tt=="train", year<2022) %>% 
                    select(all_of(varsTrain)) %>% 
                    select(-diffuse_radiation)
                )
summary(lmSnowTwo)
## 
## Call:
## lm(formula = snowfall ~ precipitation + rain, data = allCity %>% 
##     filter(tt == "train", year < 2022) %>% select(all_of(varsTrain)) %>% 
##     select(-diffuse_radiation))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.000220 -0.000131 -0.000131 -0.000131  0.069869 
## 
## Coefficients:
##                 Estimate Std. Error   t value Pr(>|t|)    
## (Intercept)    1.313e-04  5.003e-06     26.25   <2e-16 ***
## precipitation  7.000e-01  6.535e-05  10712.68   <2e-16 ***
## rain          -7.001e-01  6.603e-05 -10602.88   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.002988 on 368107 degrees of freedom
## Multiple R-squared:  0.9968, Adjusted R-squared:  0.9968 
## F-statistic: 5.738e+07 on 2 and 368107 DF,  p-value: < 2.2e-16
allCity %>% 
    filter(tt=="test", year==2022) %>%
    mutate(pred=predict(lmSnowTwo, newdata=.)) %>%
    summarize(meModel=mean((pred-snowfall)**2), 
              meBase=mean((snowfall-mean(snowfall))**2), 
              r2=1-meModel/meBase, 
              rmse=sqrt(meModel)
              )
## # A tibble: 1 × 4
##       meModel  meBase    r2     rmse
##         <dbl>   <dbl> <dbl>    <dbl>
## 1 0.000000389 0.00193  1.00 0.000624

As well, since ‘weathercode’ indicates whether it is snowing, the combination with precipitation has reasonable predictive power on snowfall:

# Precipitation and weather code as factor
lmSnowWCP <- lm(snowfall ~ precipitation:weathercode, 
                data=allCity %>% 
                    mutate(weathercode=factor(weathercode)) %>%
                    filter(tt=="train", year<2022) %>% 
                    select(all_of(varsTrain)) %>% 
                    select(-diffuse_radiation)
                )
summary(lmSnowWCP)
## 
## Call:
## lm(formula = snowfall ~ precipitation:weathercode, data = allCity %>% 
##     mutate(weathercode = factor(weathercode)) %>% filter(tt == 
##     "train", year < 2022) %>% select(all_of(varsTrain)) %>% select(-diffuse_radiation))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.26512 -0.00083 -0.00083 -0.00083  0.32684 
## 
## Coefficients: (4 not defined because of singularities)
##                               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)                  8.258e-04  2.696e-05   30.631  < 2e-16 ***
## precipitation:weathercode0          NA         NA       NA       NA    
## precipitation:weathercode1          NA         NA       NA       NA    
## precipitation:weathercode2          NA         NA       NA       NA    
## precipitation:weathercode3          NA         NA       NA       NA    
## precipitation:weathercode51 -3.225e-03  5.057e-04   -6.378  1.8e-10 ***
## precipitation:weathercode53 -1.194e-03  2.856e-04   -4.181  2.9e-05 ***
## precipitation:weathercode55 -7.506e-04  3.110e-04   -2.413 0.015802 *  
## precipitation:weathercode61 -4.578e-04  1.348e-04   -3.396 0.000683 ***
## precipitation:weathercode63 -1.939e-04  7.118e-05   -2.724 0.006442 ** 
## precipitation:weathercode65 -6.735e-05  7.805e-05   -0.863 0.388240    
## precipitation:weathercode71  1.356e-01  8.931e-04  151.859  < 2e-16 ***
## precipitation:weathercode73  4.668e-01  5.246e-04  889.991  < 2e-16 ***
## precipitation:weathercode75  6.348e-01  3.643e-04 1742.748  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01562 on 368100 degrees of freedom
## Multiple R-squared:  0.9127, Adjusted R-squared:  0.9127 
## F-statistic: 4.275e+05 on 9 and 368100 DF,  p-value: < 2.2e-16
allCity %>% 
    filter(tt=="test", year==2022) %>%
    mutate(weathercode=factor(weathercode)) %>%
    mutate(pred=predict(lmSnowWCP, newdata=.)) %>%
    summarize(meModel=mean((pred-snowfall)**2), 
              meBase=mean((snowfall-mean(snowfall))**2), 
              r2=1-meModel/meBase, 
              rmse=sqrt(meModel)
              )
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `pred = predict(lmSnowWCP, newdata = .)`.
## Caused by warning in `predict.lm()`:
## ! prediction from a rank-deficient fit may be misleading
## # A tibble: 1 × 4
##    meModel  meBase    r2   rmse
##      <dbl>   <dbl> <dbl>  <dbl>
## 1 0.000119 0.00193 0.938 0.0109
allCity %>% 
    filter(tt=="test", year==2022) %>%
    mutate(weathercode=factor(weathercode)) %>%
    mutate(pred=predict(lmSnowWCP, newdata=.)) %>%
    mutate(across(.cols=where(is.numeric), .fns=function(x) autoRound(x, rndTo=0.01))) %>%
    count(snowfall, pred) %>%
    ggplot(aes(x=pred, y=snowfall)) + 
    geom_point(aes(size=n)) + 
    labs(title="Actual vs. Predicted Snowfall\n(linear model with precipitation and weather code)") + 
    geom_smooth(method="lm", aes(weight=n)) + 
    geom_abline(slope=1, intercept=0, lty=2, color="red")
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `pred = predict(lmSnowWCP, newdata = .)`.
## Caused by warning in `predict.lm()`:
## ! prediction from a rank-deficient fit may be misleading
## `geom_smooth()` using formula = 'y ~ x'

The relationship between weathercode and precipitation/snow is explored:

allCity %>%
    mutate(weathercode=factor(weathercode)) %>%
    group_by(weathercode) %>%
    summarize(across(.cols=c("precipitation", "snowfall"), .fns=mean)) %>%
    pivot_longer(cols=-c(weathercode)) %>%
    ggplot(aes(x=weathercode, y=value)) + 
    geom_col(fill="lightblue") + 
    geom_text(aes(y=value/2, label=round(value, 1)), size=2.5) + 
    facet_wrap(~name, scales="free_y") + 
    labs(title="Average precipitation (mm) and snow (cm) by weathercode", 
         x=NULL, 
         y="Precip (mm) or Snowfall (cm)"
         )

allCity %>%
    filter(precipitation>0) %>%
    mutate(weathercode=factor(weathercode), pctSnow=snowfall/(0.7*precipitation)) %>%
    ggplot(aes(x=weathercode, y=pctSnow)) + 
    geom_boxplot(fill="lightblue") + 
    labs(title="Percent of precipitation as snowfall by weathercode", 
         x=NULL, 
         y="Snowfall (cm) divided by\n(0.7 * precipitation (mm))"
         )

While rain is sometimes falling during snow events, in general the precipitation falls mainly or enitrely as snow during weathercode 71, 73, and 75. There is no snowfall under other weathercode

A model is run to predict weathercode, at first allowing precipitation, rain, and snowfall as predictors:

keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
rfWCFull <- runFullRF(dfTrain=allCity %>% 
                          filter(tt=="train", year<2022) %>% 
                          mutate(weathercode=factor(weathercode)), 
                      yVar="weathercode", 
                      xVars=c(varsTrain[!str_detect(varsTrain, "^weathercode$")]), 
                      dfTest=allCity %>% 
                          filter(tt=="test", year==2022) %>% 
                          mutate(weathercode=factor(weathercode)), 
                      useLabel=keyLabel, 
                      useSub=stringr::str_to_sentence(keyLabel), 
                      isContVar=FALSE,
                      rndTo=-1L,
                      refXY=TRUE,
                      returnData=TRUE
                      )
## Growing trees.. Progress: 34%. Estimated remaining time: 1 minute, 0 seconds.
## Growing trees.. Progress: 67%. Estimated remaining time: 29 seconds.

## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.634%

Accuracy is extremely high, with weathercode being strongly linked to cloud cover, precipitation rate, and percentage of precipitation falling as snow

Relationships between precipitation, rain, snow, clouds, and weathercode are further explored:

# Precipitation type by weather code
allCity %>% 
    mutate(isSnow=snowfall>0, isRain=rain>0) %>% 
    group_by(isSnow, isRain, weathercode) %>% 
    summarize(n=n(), 
              across(.cols=c("rain", "snowfall", "precipitation", "cloudcover"), .fns=function(x) sum(x)), 
              .groups="drop") %>% 
    mutate(pType=case_when(isSnow & isRain~"SNRA", isSnow & !isRain~"SN", !isSnow & isRain~"RA", TRUE~"None"),
           musn=snowfall/n, 
           mura=rain/n, 
           mucc=cloudcover/n) %>% 
    ggplot(aes(y=pType, x=factor(weathercode))) + 
    geom_tile(aes(fill=n)) + 
    scale_fill_continuous(low="white", high="lightgreen") + 
    geom_text(aes(label=n), size=2.5) + 
    labs(title="Precipitation types by weather code", x="Weather code", y="Precipitation type")

# Cloud cover by weather code
allCity %>% 
    ggplot(aes(x=factor(weathercode), y=cloudcover)) + 
    geom_boxplot(fill="lightblue") + 
    labs(title="Cloud cover by weather code", x="Weather code", y="Cloud cover (%)")

# Rain by weather code
allCity %>% 
    ggplot(aes(x=factor(weathercode), y=rain)) + 
    geom_boxplot(fill="lightblue") + 
    labs(title="Rain by weather code", x="Weather code", y="Rain (mm)")

# Snow by weather code
allCity %>% 
    ggplot(aes(x=factor(weathercode), y=snowfall)) + 
    geom_boxplot(fill="lightblue") + 
    labs(title="Snow by weather code", x="Weather code", y="Snow (cm)")

# Table of results
allCity %>% 
    mutate(isSnow=snowfall>0, isRain=rain>0) %>% 
    group_by(isSnow, isRain, weathercode) %>% 
    summarize(n=n(), 
              maxcc=max(cloudcover), 
              mincc=min(cloudcover),
              across(.cols=c("rain", "snowfall", "precipitation", "cloudcover"), .fns=function(x) sum(x)),
              .groups="drop") %>% 
    mutate(pType=case_when(isSnow & isRain~"SNRA", isSnow & !isRain~"SN", !isSnow & isRain~"RA", TRUE~"None"),
           muprecip=precipitation/n, 
           musn=snowfall/n, 
           mura=rain/n, 
           mucc=cloudcover/n
           ) %>%
    select(-rain, -snowfall, -precipitation, -cloudcover) %>%
    select(weathercode, pType, isSnow, isRain, n, mincc, mucc, maxcc, everything())
## # A tibble: 16 × 11
##    weathercode pType isSnow isRain      n mincc  mucc maxcc muprecip   musn
##          <int> <chr> <lgl>  <lgl>   <int> <int> <dbl> <int>    <dbl>  <dbl>
##  1           0 None  FALSE  FALSE  296257     0  3.61    20   0      0     
##  2           1 None  FALSE  FALSE  126163    20 31.8     50   0      0     
##  3           2 None  FALSE  FALSE   48304    50 63.7     80   0      0     
##  4           3 None  FALSE  FALSE   72391    80 94.2    100   0      0     
##  5          51 RA    FALSE  TRUE    32254     0 77.1    100   0.200  0     
##  6          53 RA    FALSE  TRUE    10815     0 86.2    100   0.663  0     
##  7          55 RA    FALSE  TRUE     3461     1 87.7    100   1.09   0     
##  8          61 RA    FALSE  TRUE     7011     1 89.9    100   1.74   0     
##  9          63 RA    FALSE  TRUE     4947     4 91.0    100   3.91   0     
## 10          65 RA    FALSE  TRUE      551    20 93.0    100  11.0    0     
## 11          71 SN    TRUE   FALSE    3192     0 93.4    100   0.0997 0.0906
## 12          73 SN    TRUE   FALSE    1800     0 97.2    100   0.525  0.368 
## 13          75 SN    TRUE   FALSE     378    61 98.7    100   1.80   1.26  
## 14          71 SNRA  TRUE   TRUE      521    21 95.0    100   0.610  0.0936
## 15          73 SNRA  TRUE   TRUE      553    29 96.9    100   1.03   0.396 
## 16          75 SNRA  TRUE   TRUE      186    64 99.2    100   2.47   1.40  
## # ℹ 1 more variable: mura <dbl>

Weather codes appear to be defined as:

Not surprisingly, the random forest is effective at pulling apart very clean data splits like these

A model is run to predict weathercode, using only cloud cover, precipitation, rain, and snowfall as predictors, and with random forest defaults (mtry=2 for 4 predictors):

keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022) %>% mutate(weathercode=factor(weathercode)), 
          yVar="weathercode", 
          xVars=c("cloudcover", "precipitation", "rain", "snowfall"), 
          dfTest=allCity %>% filter(tt=="test", year==2022) %>% mutate(weathercode=factor(weathercode)), 
          useLabel=keyLabel, 
          useSub=stringr::str_to_sentence(keyLabel), 
          isContVar=FALSE,
          rndTo=-1L,
          refXY=TRUE,
          returnData=FALSE
          )

## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.436%

Accuracy remains over 99%, with main errors being cloudiness classification when there is zero precipitation

The model is updated using mtry=4 and mtry=1:

keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022) %>% mutate(weathercode=factor(weathercode)), 
          yVar="weathercode", 
          xVars=c("cloudcover", "precipitation", "rain", "snowfall"), 
          dfTest=allCity %>% filter(tt=="test", year==2022) %>% mutate(weathercode=factor(weathercode)), 
          useLabel=keyLabel, 
          useSub=stringr::str_to_sentence(keyLabel), 
          isContVar=FALSE,
          rndTo=-1L,
          mtry=1,
          refXY=TRUE,
          returnData=FALSE
          )

## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.267%

keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022) %>% mutate(weathercode=factor(weathercode)), 
          yVar="weathercode", 
          xVars=c("cloudcover", "precipitation", "rain", "snowfall"), 
          dfTest=allCity %>% filter(tt=="test", year==2022) %>% mutate(weathercode=factor(weathercode)), 
          useLabel=keyLabel, 
          useSub=stringr::str_to_sentence(keyLabel), 
          isContVar=FALSE,
          rndTo=-1L,
          mtry=4,
          refXY=TRUE,
          returnData=FALSE
          )

## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.429%

The model performs with much lower accuracy for mtry=1 rather than mtry=2, but accuracy is essentially identical (over 99%) for mtry=2 and mtry=4. The main issue with mtry=1 is inability to classify non-precipitation days since snowfall, rain, and precipitation as stand-alones do not distinguish degree of cloudiness

All combinations of two variables are explored for predicting weathercode on a smaller training dataset:

# Train and test data created previously (dfTrainCloud and dfTestCloud)
# Variables to explore
possWCVars <- c(varsTrain[!str_detect(varsTrain, "weathercode")], "month", "tod")

# Subsets to use
set.seed(24083015)
idxSmallWC <- sample(1:nrow(dfTrainCloud), 5000, replace=FALSE)
mtxSmallWC <- matrix(nrow=0, ncol=3)

for(idx1 in 1:(length(possWCVars)-1)) {
    for(idx2 in (idx1+1):length(possWCVars)) {
        r2SmallWC <- runFullRF(dfTrain=dfTrainCloud[idxSmallWC,] %>% mutate(weathercode=factor(weathercode)), 
                               yVar="weathercode", 
                               xVars=possWCVars[c(idx1, idx2)], 
                               dfTest=dfTestCloud %>% mutate(weathercode=factor(weathercode)), 
                               useLabel=keyLabel, 
                               useSub=stringr::str_to_sentence(keyLabel), 
                               isContVar=FALSE,
                               mtry=2,
                               makePlots=FALSE,
                               returnData=TRUE
                               )[["rfAcc"]]
        mtxSmallWC <- rbind(mtxSmallWC, c(idx1, idx2, r2SmallWC))
    }
}
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.568%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.022%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.878%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.515%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.758%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.325%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 60.652%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.048%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.456%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.097%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 60.721%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.917%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.047%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.304%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.176%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.146%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.328%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.553%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.088%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.271%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.479%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.352%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.936%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.382%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.4%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.033%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.375%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.106%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.95%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.563%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.512%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.226%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.905%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.753%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.473%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.451%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.448%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.996%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.081%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.944%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.572%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.483%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.813%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.351%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.138%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.958%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.645%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.661%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.595%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.065%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.784%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.832%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.975%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.464%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.051%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.358%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.586%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.084%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.867%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.023%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.178%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.369%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.767%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.89%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.732%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.397%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.041%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.103%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.708%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.16%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.197%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.181%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.471%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.77%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.732%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.139%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.734%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.959%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.907%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.761%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.339%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.913%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.1%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.161%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.897%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.542%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.349%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.45%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.871%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.551%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.57%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.639%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.042%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.509%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.532%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.313%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.107%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.197%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.677%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.397%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.963%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.786%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.242%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.954%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.037%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.737%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.401%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.341%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.095%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.851%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.465%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.833%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.663%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.723%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.221%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.296%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.172%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.449%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.771%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.447%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.729%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.79%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.333%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.15%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.995%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.354%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.08%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.651%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.798%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.214%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.103%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.379%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.232%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.749%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.494%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.567%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.318%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.987%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.318%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.458%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.006%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.366%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.462%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.993%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.579%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.091%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.884%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.209%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.663%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.428%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.941%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.657%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.975%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.792%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.848%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.154%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.284%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.532%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.426%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.574%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.049%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.211%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.999%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.313%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.382%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.599%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.674%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.097%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.048%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.542%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.047%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.171%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.994%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.503%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.78%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.202%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.906%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.671%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.09%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.004%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.495%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.045%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.36%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.602%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.337%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.208%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.621%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.244%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.864%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.685%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.114%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.236%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.43%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.222%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.884%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.306%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.635%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.49%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.512%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.365%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.824%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.408%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.599%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.451%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.764%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.965%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.872%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.68%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.214%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.212%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.383%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.012%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.008%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.222%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.125%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.049%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.133%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.216%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.57%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.915%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.047%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.572%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.555%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.387%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.123%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.411%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.452%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.307%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.42%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.975%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.955%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.213%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.229%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 98.187%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 71.917%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.263%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.242%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.879%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.998%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.285%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.17%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.72%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.892%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.638%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.678%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.319%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.966%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.893%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.905%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.78%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.679%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.291%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.09%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.371%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.452%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.884%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 60.858%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.95%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.071%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.132%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.213%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 98.301%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.199%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.85%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.166%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.358%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.166%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.308%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.695%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.344%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.303%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.156%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.6%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 60.081%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 60.477%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.075%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.151%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.585%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.71%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.809%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.691%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.767%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.092%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.409%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.238%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.912%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.216%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.238%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 89.573%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 63.318%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.908%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.567%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.731%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.995%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.008%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.487%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.288%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.995%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.1%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.399%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.421%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.512%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.567%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.254%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.442%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.638%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.775%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.742%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.252%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.573%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.054%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.456%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.48%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.456%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.456%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.417%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.762%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.333%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.311%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.76%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.851%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.272%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.65%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.391%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.566%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.741%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.427%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.432%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.709%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.993%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.642%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.909%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.521%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.442%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.879%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.741%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.741%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.642%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.589%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.402%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.59%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.515%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 75.04%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.095%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.451%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.017%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 60.789%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.514%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.922%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.539%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.181%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.58%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 60.165%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.032%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.173%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.63%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.323%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.192%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.712%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.859%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.597%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.278%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 60.469%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.204%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.391%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.589%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 69.701%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.67%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.926%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.965%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.265%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.546%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.609%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.058%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.426%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.079%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.427%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.747%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.596%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.799%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.508%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.363%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.462%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.929%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.142%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.849%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.214%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.38%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.534%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.575%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.935%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.405%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.809%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.593%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.978%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.658%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.534%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.625%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.882%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.748%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.06%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.478%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.694%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.067%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.524%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.767%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.958%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.122%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.553%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.661%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.313%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.837%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.686%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.919%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.14%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.485%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.737%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.59%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.008%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.749%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.76%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.459%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.592%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.991%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.549%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.012%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.84%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.425%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.699%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.212%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.844%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.271%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.427%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.718%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.215%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.546%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.774%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.773%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.892%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.638%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.524%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.664%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.409%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.805%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.446%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.951%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.979%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.647%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.453%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.339%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.593%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.217%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.36%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.179%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.693%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.272%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.132%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.774%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.756%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.855%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.228%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.353%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.156%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.884%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.975%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.035%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.132%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.503%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.386%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.495%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.266%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.479%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.858%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.35%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.214%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.803%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.175%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.798%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.375%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.703%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.396%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.245%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.659%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.601%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.411%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.774%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.889%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.327%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.847%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.2%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.131%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.693%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.649%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.924%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.979%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.177%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.436%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.895%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.019%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.944%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.557%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.71%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.899%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.787%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.236%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.963%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.333%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.468%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.663%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.244%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.939%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.421%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.946%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.733%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.947%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.568%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.855%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.214%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.145%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.64%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.145%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.472%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.252%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.896%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.272%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.297%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.003%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.175%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.08%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.692%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.52%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.566%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.649%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.663%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.556%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.02%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.843%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.462%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.29%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.691%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.66%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.404%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.812%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.908%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.444%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.002%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.269%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.733%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.564%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.236%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.266%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.305%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.183%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.557%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.036%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.516%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.748%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.235%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.076%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.355%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.705%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.041%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.42%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.305%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.419%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.015%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.358%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.551%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.211%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.746%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.261%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.683%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.474%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.254%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.835%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.192%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.924%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.244%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.249%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.049%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.468%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.391%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.718%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.891%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.139%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.764%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.312%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.981%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.905%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.465%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.481%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.58%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.128%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.258%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.384%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.323%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.024%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.915%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.166%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.681%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.34%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.034%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.538%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.12%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.681%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.161%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.216%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.119%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.921%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.221%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.06%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.546%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.617%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.382%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.465%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.412%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.864%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.766%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.409%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.708%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.926%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.838%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.405%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.767%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.298%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.763%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.764%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.881%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.237%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.931%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.938%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.582%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.57%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.8%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.658%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.133%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.622%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.056%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.094%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.094%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.069%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.236%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.483%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.612%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.68%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.034%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.917%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.579%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.418%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.696%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.885%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.491%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.057%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.462%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.023%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.136%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.37%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.988%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.587%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.337%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.085%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.928%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.451%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.262%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.58%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.451%

Accuracy by pairs of metrics is explored:

dfSmallR2WC <- as.data.frame(mtxSmallWC) %>% 
    purrr::set_names(c("idx1", "idx2", "r2")) %>% 
    tibble::as_tibble() %>% 
    mutate(var1=possWCVars[idx1], var2=possWCVars[idx2], rn=row_number()) 
dfSmallR2WC %>% arrange(desc(r2)) %>% select(var1, var2, r2) %>% print(n=20)
## # A tibble: 666 × 3
##    var1                 var2                          r2
##    <chr>                <chr>                      <dbl>
##  1 rain                 cloudcover                 0.983
##  2 precipitation        cloudcover                 0.982
##  3 snowfall             cloudcover                 0.896
##  4 cloudcover           tod                        0.886
##  5 cloudcover           direct_normal_irradiance   0.879
##  6 cloudcover           direct_radiation           0.878
##  7 cloudcover           et0_fao_evapotranspiration 0.874
##  8 cloudcover           cloudcover_low             0.874
##  9 cloudcover           month                      0.874
## 10 cloudcover           cloudcover_high            0.873
## 11 cloudcover           shortwave_radiation        0.873
## 12 cloudcover           diffuse_radiation          0.873
## 13 hour                 cloudcover                 0.871
## 14 relativehumidity_2m  cloudcover                 0.869
## 15 cloudcover           cloudcover_mid             0.868
## 16 cloudcover           vapor_pressure_deficit     0.867
## 17 cloudcover           soil_moisture_0_to_7cm     0.864
## 18 cloudcover           windgusts_10m              0.864
## 19 temperature_2m       cloudcover                 0.861
## 20 apparent_temperature cloudcover                 0.860
## # ℹ 646 more rows
dfSmallR2WC %>% 
    pivot_longer(cols=c(var1, var2)) %>% 
    group_by(value) %>% 
    summarize(across(r2, .fns=list("min"=min, "mu"=mean, "max"=max))) %>% 
    ggplot(aes(x=fct_reorder(value, r2_mu))) + 
    coord_flip() + 
    geom_point(aes(y=r2_mu)) + 
    geom_errorbar(aes(ymin=r2_min, ymax=r2_max)) + 
    lims(y=c(NA, 1)) + 
    geom_hline(yintercept=1, lty=2, color="red") +
    labs(title="Accuracy in every 2-predictor model including self and one other", 
         subtitle="Predicting weathercode", 
         y="Range of accuracy (min-mean-max)", 
         x=NULL
    )

dfSmallR2WC %>% 
    arrange(desc(r2)) %>% 
    filter(!str_detect(var2, "cloudcover"), !str_detect(var1, "cloudcover")) %>% 
    select(var1, var2, r2) %>% 
    print(n=20)
## # A tibble: 528 × 3
##    var1                var2                          r2
##    <chr>               <chr>                      <dbl>
##  1 precipitation       snowfall                   0.622
##  2 precipitation       rain                       0.622
##  3 rain                snowfall                   0.622
##  4 relativehumidity_2m rain                       0.617
##  5 rain                diffuse_radiation          0.617
##  6 rain                year                       0.612
##  7 rain                tod                        0.612
##  8 rain                month                      0.612
##  9 precipitation       diffuse_radiation          0.612
## 10 relativehumidity_2m precipitation              0.611
## 11 precipitation       tod                        0.611
## 12 precipitation       month                      0.611
## 13 hour                rain                       0.610
## 14 precipitation       year                       0.609
## 15 hour                precipitation              0.607
## 16 rain                et0_fao_evapotranspiration 0.605
## 17 rain                windgusts_10m              0.601
## 18 precipitation       et0_fao_evapotranspiration 0.600
## 19 precipitation       windgusts_10m              0.593
## 20 rain                vapor_pressure_deficit     0.591
## # ℹ 508 more rows
dfSmallR2WC %>% 
    filter(var2!="precipitation", 
           var1!="precipitation", 
           !str_detect(var2, "cloudcover"), 
           !str_detect(var1, "cloudcover")
           ) %>% 
    pivot_longer(cols=c(var1, var2)) %>% 
    group_by(value) %>% 
    summarize(across(r2, .fns=list("min"=min, "mu"=mean, "max"=max))) %>% 
    ggplot(aes(x=fct_reorder(value, r2_mu))) + 
    coord_flip() + 
    geom_point(aes(y=r2_mu)) + 
    geom_errorbar(aes(ymin=r2_min, ymax=r2_max)) + 
    lims(y=c(NA, 1)) + 
    geom_hline(yintercept=1, lty=2, color="red") +
    labs(title="Accuracy in every 2-predictor model including self and one other", 
         subtitle="Predicting weathercode (excluding variable paired with 'precipitation' or 'cloudcover')", 
         y="Range of accuracy (min-mean-max)", 
         x=NULL
    )

dfSmallR2WC %>% 
    arrange(desc(r2)) %>% 
    filter(!str_detect(var2, "rain|snow|precip"), 
           !str_detect(var1, "rain|snow|precip"),
           !str_detect(var2, "cloudcover"), 
           !str_detect(var1, "cloudcover")
           ) %>% 
    select(var1, var2, r2) %>% 
    print(n=20)
## # A tibble: 435 × 3
##    var1                       var2                          r2
##    <chr>                      <chr>                      <dbl>
##  1 shortwave_radiation        direct_normal_irradiance   0.541
##  2 direct_normal_irradiance   diffuse_radiation          0.541
##  3 shortwave_radiation        direct_radiation           0.539
##  4 direct_radiation           diffuse_radiation          0.538
##  5 direct_radiation           direct_normal_irradiance   0.535
##  6 shortwave_radiation        diffuse_radiation          0.535
##  7 relativehumidity_2m        tod                        0.520
##  8 relativehumidity_2m        diffuse_radiation          0.515
##  9 hour                       tod                        0.515
## 10 year                       tod                        0.515
## 11 month                      tod                        0.515
## 12 diffuse_radiation          et0_fao_evapotranspiration 0.512
## 13 hour                       direct_normal_irradiance   0.511
## 14 diffuse_radiation          tod                        0.510
## 15 diffuse_radiation          vapor_pressure_deficit     0.507
## 16 direct_normal_irradiance   et0_fao_evapotranspiration 0.502
## 17 et0_fao_evapotranspiration tod                        0.500
## 18 year                       month                      0.499
## 19 hour                       year                       0.499
## 20 relativehumidity_2m        direct_normal_irradiance   0.499
## # ℹ 415 more rows
# Null accuracy would pick the most frequent observation
allCity %>% count(weathercode, sort=TRUE) %>% mutate(pct=n/sum(n))
## # A tibble: 13 × 3
##    weathercode      n      pct
##          <int>  <int>    <dbl>
##  1           0 296257 0.487   
##  2           1 126163 0.207   
##  3           3  72391 0.119   
##  4           2  48304 0.0793  
##  5          51  32254 0.0530  
##  6          53  10815 0.0178  
##  7          61   7011 0.0115  
##  8          63   4947 0.00813 
##  9          71   3713 0.00610 
## 10          55   3461 0.00569 
## 11          73   2353 0.00387 
## 12          75    564 0.000926
## 13          65    551 0.000905

Cloud cover and precipitation are highly predictive of weathercode, with most other variables having little explanatory power (accuracy near or even below the ~50% baseline for predicting everything as weathercode 0)

Select combinations are explored using the full training dataset:

possLargeWC <- c("precipitation", "rain", "snowfall", "cloudcover")
possLargeWC
## [1] "precipitation" "rain"          "snowfall"      "cloudcover"
mtxLargeWC <- matrix(nrow=0, ncol=3)

for(idx1 in 1:(length(possLargeWC)-1)) {
    for(idx2 in (idx1+1):length(possLargeWC)) {
        r2LargeWC <- runFullRF(dfTrain=dfTrainCloud[,] %>% mutate(weathercode=factor(weathercode)), 
                               yVar="weathercode", 
                               xVars=possLargeWC[c(idx1, idx2)], 
                               dfTest=dfTestCloud %>% mutate(weathercode=factor(weathercode)),
                               useLabel=keyLabel, 
                               useSub=stringr::str_to_sentence(keyLabel), 
                               isContVar=FALSE,
                               mtry=2,
                               makePlots=FALSE,
                               returnData=TRUE
                               )[["rfAcc"]]
        mtxLargeWC <- rbind(mtxLargeWC, c(idx1, idx2, r2LargeWC))
    }
}
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.259%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.267%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 98.423%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.267%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 98.431%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 89.626%
dfLargeR2WC <- as.data.frame(mtxLargeWC) %>% 
    purrr::set_names(c("idx1", "idx2", "r2")) %>% 
    tibble::as_tibble() %>% 
    mutate(var1=possLargeWC[idx1], var2=possLargeWC[idx2], rn=row_number()) 
dfLargeR2WC %>% arrange(desc(r2)) %>% select(var1, var2, r2) %>% print(n=20)
## # A tibble: 6 × 3
##   var1          var2          r2
##   <chr>         <chr>      <dbl>
## 1 rain          cloudcover 0.984
## 2 precipitation cloudcover 0.984
## 3 snowfall      cloudcover 0.896
## 4 precipitation snowfall   0.623
## 5 rain          snowfall   0.623
## 6 precipitation rain       0.623

Accuracy by type of weathercode is further explored for one subset (cloud cover and rain):

runFullRF(dfTrain=dfTrainCloud[,] %>% mutate(weathercode=factor(weathercode)), 
          yVar="weathercode", 
          xVars=c("cloudcover", "rain"), 
          dfTest=dfTestCloud %>% mutate(weathercode=factor(weathercode)),
          useLabel=keyLabel, 
          useSub=stringr::str_to_sentence(keyLabel), 
          isContVar=FALSE,
          mtry=2,
          makePlots=TRUE,
          returnData=TRUE
          )[["tstPred"]] %>%
    mutate(wcType=case_when(weathercode %in% c(0, 1, 2, 3)~"No Precip", 
                            weathercode %in% c(51, 53, 55, 61, 63, 65)~"Rain only", 
                            weathercode %in% c(71, 73, 75)~"Snow", 
                            TRUE~"Other"
                            )
           ) %>% 
    group_by(wcType) %>%
    summarize(acc=mean(weathercode==pred))

## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 98.431%

## # A tibble: 3 × 2
##   wcType      acc
##   <chr>     <dbl>
## 1 No Precip 0.994
## 2 Rain only 1    
## 3 Snow      0

The model with cloud cover and rain as predictors is very accurate at predicting weathercode, with the exception of zero accuracy during snowfall (71, 73, 75)

Accuracy by type of weathercode is further explored for another subset (cloud cover and snow):

runFullRF(dfTrain=dfTrainCloud[,] %>% mutate(weathercode=factor(weathercode)), 
          yVar="weathercode", 
          xVars=c("cloudcover", "snowfall"), 
          dfTest=dfTestCloud %>% mutate(weathercode=factor(weathercode)),
          useLabel=keyLabel, 
          useSub=stringr::str_to_sentence(keyLabel), 
          isContVar=FALSE,
          mtry=2,
          makePlots=TRUE,
          returnData=TRUE
          )[["tstPred"]] %>%
    mutate(wcType=case_when(weathercode %in% c(0, 1, 2, 3)~"No Precip", 
                            weathercode %in% c(51, 53, 55, 61, 63, 65)~"Rain only", 
                            weathercode %in% c(71, 73, 75)~"Snow", 
                            TRUE~"Other"
                            )
           ) %>% 
    group_by(wcType) %>%
    summarize(acc=mean(weathercode==pred))

## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 89.626%

## # A tibble: 3 × 2
##   wcType      acc
##   <chr>     <dbl>
## 1 No Precip 0.994
## 2 Rain only 0    
## 3 Snow      1

The model with cloud cover and snow as predictors is very accurate at predicting weathercode, with the exception of zero accuracy during rain events without snowfall (51, 53, 55, 61, 63, 65)

Accuracy by type of weathercode is further explored for a third subset (rain and snow):

runFullRF(dfTrain=dfTrainCloud[,] %>% mutate(weathercode=factor(weathercode)), 
          yVar="weathercode", 
          xVars=c("rain", "snowfall"), 
          dfTest=dfTestCloud %>% mutate(weathercode=factor(weathercode)),
          useLabel=keyLabel, 
          useSub=stringr::str_to_sentence(keyLabel), 
          isContVar=FALSE,
          mtry=2,
          makePlots=TRUE,
          returnData=TRUE
          )[["tstPred"]] %>%
    mutate(wcType=case_when(weathercode %in% c(0, 1, 2, 3)~"No Precip", 
                            weathercode %in% c(51, 53, 55, 61, 63, 65)~"Rain only", 
                            weathercode %in% c(71, 73, 75)~"Snow", 
                            TRUE~"Other"
                            )
           ) %>% 
    group_by(wcType) %>%
    summarize(acc=mean(weathercode==pred))

## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.267%

## # A tibble: 3 × 2
##   wcType      acc
##   <chr>     <dbl>
## 1 No Precip 0.577
## 2 Rain only 1    
## 3 Snow      1

The model with rain and snow as predictors is very accurate at predicting weathercode during precipitation events, but can do no better than predicting the null ‘0’ for no-precipitation observations (0, 1, 2, 3)

Accuracy by type of weathercode is explored for the three main predictors with mtry=1:

runFullRF(dfTrain=dfTrainCloud[,] %>% mutate(weathercode=factor(weathercode)), 
          yVar="weathercode", 
          xVars=c("rain", "snowfall", "cloudcover"), 
          dfTest=dfTestCloud %>% mutate(weathercode=factor(weathercode)),
          useLabel=keyLabel, 
          useSub=stringr::str_to_sentence(keyLabel), 
          isContVar=FALSE,
          mtry=1,
          makePlots=TRUE,
          returnData=TRUE
          )[["tstPred"]] %>%
    mutate(wcType=case_when(weathercode %in% c(0, 1, 2, 3)~"No Precip", 
                            weathercode %in% c(51, 53, 55, 61, 63, 65)~"Rain only", 
                            weathercode %in% c(71, 73, 75)~"Snow", 
                            TRUE~"Other"
                            )
           ) %>% 
    group_by(wcType) %>%
    summarize(acc=mean(weathercode==pred))

## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.328%

## # A tibble: 3 × 2
##   wcType      acc
##   <chr>     <dbl>
## 1 No Precip 0.790
## 2 Rain only 0.800
## 3 Snow      0.992

The model with three predictors and mtry=1 drives ~90% accuracy, lowest during “rain only” events

Accuracy by type of weathercode is explored for the three main predictors with mtry=2:

runFullRF(dfTrain=dfTrainCloud[,] %>% mutate(weathercode=factor(weathercode)), 
          yVar="weathercode", 
          xVars=c("rain", "snowfall", "cloudcover"), 
          dfTest=dfTestCloud %>% mutate(weathercode=factor(weathercode)),
          useLabel=keyLabel, 
          useSub=stringr::str_to_sentence(keyLabel), 
          isContVar=FALSE,
          mtry=2,
          makePlots=TRUE,
          returnData=TRUE
          )[["tstPred"]] %>%
    mutate(wcType=case_when(weathercode %in% c(0, 1, 2, 3)~"No Precip", 
                            weathercode %in% c(51, 53, 55, 61, 63, 65)~"Rain only", 
                            weathercode %in% c(71, 73, 75)~"Snow", 
                            TRUE~"Other"
                            )
           ) %>% 
    group_by(wcType) %>%
    summarize(acc=mean(weathercode==pred))

## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.429%

## # A tibble: 3 × 2
##   wcType      acc
##   <chr>     <dbl>
## 1 No Precip 0.994
## 2 Rain only 1    
## 3 Snow      0.992

The model with three predictors and mtry=2 drives ~99% accuracy, strong during all three event types

Accuracy by type of weathercode is explored for the three main predictors with mtry=3:

runFullRF(dfTrain=dfTrainCloud[,] %>% mutate(weathercode=factor(weathercode)), 
          yVar="weathercode", 
          xVars=c("rain", "snowfall", "cloudcover"), 
          dfTest=dfTestCloud %>% mutate(weathercode=factor(weathercode)),
          useLabel=keyLabel, 
          useSub=stringr::str_to_sentence(keyLabel), 
          isContVar=FALSE,
          mtry=3,
          makePlots=TRUE,
          returnData=TRUE
          )[["tstPred"]] %>%
    mutate(wcType=case_when(weathercode %in% c(0, 1, 2, 3)~"No Precip", 
                            weathercode %in% c(51, 53, 55, 61, 63, 65)~"Rain only", 
                            weathercode %in% c(71, 73, 75)~"Snow", 
                            TRUE~"Other"
                            )
           ) %>% 
    group_by(wcType) %>%
    summarize(acc=mean(weathercode==pred))

## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.429%

## # A tibble: 3 × 2
##   wcType      acc
##   <chr>     <dbl>
## 1 No Precip 0.994
## 2 Rain only 1    
## 3 Snow      0.992

The model with three predictors and mtry=3 drives the same accuracy as the model with three predictors and mtry=2

A model is run to predict ground-level wind speed, at first allowing high-level wind speed as a predictor:

keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
rfWindFull <- runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022), 
                      yVar="windspeed_10m", 
                      xVars=c(varsTrain[!str_detect(varsTrain, "^windspeed_10m$")]), 
                      dfTest=allCity %>% filter(tt=="test", year==2022), 
                      useLabel=keyLabel, 
                      useSub=stringr::str_to_sentence(keyLabel), 
                      isContVar=TRUE,
                      rndTo=-1L,
                      refXY=TRUE,
                      returnData=TRUE
                      )
## Growing trees.. Progress: 14%. Estimated remaining time: 3 minutes, 4 seconds.
## Growing trees.. Progress: 30%. Estimated remaining time: 2 minutes, 29 seconds.
## Growing trees.. Progress: 42%. Estimated remaining time: 2 minutes, 7 seconds.
## Growing trees.. Progress: 57%. Estimated remaining time: 1 minute, 35 seconds.
## Growing trees.. Progress: 72%. Estimated remaining time: 1 minute, 1 seconds.
## Growing trees.. Progress: 86%. Estimated remaining time: 30 seconds.
## Growing trees.. Progress: 100%. Estimated remaining time: 0 seconds.

## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 97.23% (RMSE 1.26 vs. 7.58 null)
## `geom_smooth()` using formula = 'y ~ x'

The model is effective (~95% R-squared) at predicting ground-level wind speed, primarily by leveraging high-level wind speed and ground-level wind gusts

Correlations between predictors and ground wind speed are assessed:

sapply(varsTrain, FUN=function(x) cor(allCity$windspeed_10m, allCity[[x]])) %>% 
    as.data.frame() %>% 
    rownames_to_column("var") %>% 
    tibble::as_tibble() %>% 
    purrr::set_names(c("var", "cor")) %>% 
    ggplot(aes(x=fct_reorder(var, cor), y=cor)) + 
    geom_col(fill="lightblue") + 
    geom_text(data=~filter(., abs(cor)>0.2), aes(y=cor/2, label=round(cor, 2)), size=2.5) +
    coord_flip() + 
    labs(title="Correlation with ground wind speed (windspeed_10m)", 
         y="Correlation", 
         x=NULL
         ) + 
    lims(y=c(NA, 1))

allCity %>%
    select(windspeed_10m, windspeed_100m, windgusts_10m) %>%
    mutate(across(.cols=everything(), .fns=function(x) round(x)), rn=row_number()) %>%
    pivot_longer(cols=-c(rn, windspeed_10m)) %>%
    count(windspeed_10m, name, value) %>%
    ggplot(aes(x=value, y=windspeed_10m)) + 
    geom_point(aes(size=n), alpha=0.5) + 
    geom_smooth(aes(weight=n), method="lm") +
    facet_wrap(~name) + 
    labs(x=NULL, title="Ground-level (10m) windspeed vs. two strong predictors")
## `geom_smooth()` using formula = 'y ~ x'

The linear model is run for ground wind speed, using all predictors:

# Eliminate diffuse radiation due to rank-deficiency
lmWindFull <- lm(windspeed_10m ~ ., 
                 data=allCity %>% 
                     filter(tt=="train", year<2022) %>% 
                     mutate(weathercode=factor(weathercode)) %>%
                     select(all_of(varsTrain)) %>% 
                     select(-diffuse_radiation)
                 )
summary(lmWindFull)
## 
## Call:
## lm(formula = windspeed_10m ~ ., data = allCity %>% filter(tt == 
##     "train", year < 2022) %>% mutate(weathercode = factor(weathercode)) %>% 
##     select(all_of(varsTrain)) %>% select(-diffuse_radiation))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.4822 -0.9703 -0.0413  0.9710  9.3060 
## 
## Coefficients:
##                                 Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)                   -1.291e+02  1.630e+00  -79.217  < 2e-16 ***
## hour                           1.240e-02  4.307e-04   28.797  < 2e-16 ***
## temperature_2m                 4.805e-01  4.035e-03  119.067  < 2e-16 ***
## relativehumidity_2m            6.994e-03  4.113e-04   17.003  < 2e-16 ***
## dewpoint_2m                    9.937e-02  1.359e-03   73.138  < 2e-16 ***
## apparent_temperature          -5.625e-01  3.330e-03 -168.930  < 2e-16 ***
## pressure_msl                   4.571e-02  6.078e-04   75.203  < 2e-16 ***
## surface_pressure              -1.783e-02  2.712e-04  -65.756  < 2e-16 ***
## precipitation                  3.449e+00  7.060e-01    4.884 1.04e-06 ***
## rain                          -3.465e+00  7.065e-01   -4.905 9.35e-07 ***
## snowfall                      -5.648e+00  1.023e+00   -5.520 3.39e-08 ***
## cloudcover                     7.392e-03  4.485e-04   16.482  < 2e-16 ***
## cloudcover_low                 1.615e-03  2.315e-04    6.976 3.05e-12 ***
## cloudcover_mid                -5.238e-03  1.728e-04  -30.320  < 2e-16 ***
## cloudcover_high               -2.226e-03  1.094e-04  -20.351  < 2e-16 ***
## shortwave_radiation            5.895e-03  6.310e-05   93.422  < 2e-16 ***
## direct_radiation              -5.542e-03  6.760e-05  -81.991  < 2e-16 ***
## direct_normal_irradiance       9.342e-04  2.526e-05   36.982  < 2e-16 ***
## windspeed_100m                 4.904e-01  6.476e-04  757.182  < 2e-16 ***
## winddirection_10m              5.565e-04  4.183e-05   13.303  < 2e-16 ***
## winddirection_100m            -1.441e-04  4.220e-05   -3.414  0.00064 ***
## windgusts_10m                  9.405e-02  5.358e-04  175.545  < 2e-16 ***
## et0_fao_evapotranspiration     3.908e+00  7.365e-02   53.065  < 2e-16 ***
## weathercode1                   2.258e-01  1.099e-02   20.556  < 2e-16 ***
## weathercode2                   2.755e-01  1.976e-02   13.943  < 2e-16 ***
## weathercode3                   3.874e-01  2.602e-02   14.888  < 2e-16 ***
## weathercode51                  4.848e-01  2.336e-02   20.758  < 2e-16 ***
## weathercode53                  1.523e-01  3.095e-02    4.922 8.56e-07 ***
## weathercode55                  7.405e-02  4.383e-02    1.689  0.09116 .  
## weathercode61                  6.336e-02  4.221e-02    1.501  0.13339    
## weathercode63                 -6.724e-02  6.944e-02   -0.968  0.33290    
## weathercode65                  5.492e-01  1.901e-01    2.889  0.00387 ** 
## weathercode71                  1.761e+00  4.692e-02   37.528  < 2e-16 ***
## weathercode73                  1.848e+00  6.804e-02   27.159  < 2e-16 ***
## weathercode75                  1.846e+00  1.898e-01    9.725  < 2e-16 ***
## vapor_pressure_deficit        -2.981e-01  8.028e-03  -37.135  < 2e-16 ***
## soil_temperature_0_to_7cm      9.211e-03  1.223e-03    7.529 5.12e-14 ***
## soil_temperature_7_to_28cm     1.158e-01  2.052e-03   56.439  < 2e-16 ***
## soil_temperature_28_to_100cm  -8.989e-03  2.277e-03   -3.948 7.87e-05 ***
## soil_temperature_100_to_255cm -3.123e-03  1.173e-03   -2.662  0.00776 ** 
## soil_moisture_0_to_7cm         1.261e+00  7.054e-02   17.881  < 2e-16 ***
## soil_moisture_7_to_28cm       -2.749e+00  1.003e-01  -27.406  < 2e-16 ***
## soil_moisture_28_to_100cm     -4.102e+00  7.529e-02  -54.480  < 2e-16 ***
## soil_moisture_100_to_255cm     6.598e+00  7.354e-02   89.720  < 2e-16 ***
## year                           4.777e-02  7.887e-04   60.564  < 2e-16 ***
## doy                            2.214e-04  3.163e-05    7.001 2.55e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.546 on 368064 degrees of freedom
## Multiple R-squared:  0.9493, Adjusted R-squared:  0.9493 
## F-statistic: 1.533e+05 on 45 and 368064 DF,  p-value: < 2.2e-16
allCity %>% 
    filter(tt=="test", year==2022) %>%
    mutate(weathercode=factor(weathercode)) %>%
    mutate(pred=predict(lmWindFull, newdata=.)) %>%
    summarize(meModel=mean((pred-windspeed_10m)**2), 
              meBase=mean((windspeed_10m-mean(windspeed_10m))**2), 
              r2=1-meModel/meBase, 
              rmse=sqrt(meModel)
              )
## # A tibble: 1 × 4
##   meModel meBase    r2  rmse
##     <dbl>  <dbl> <dbl> <dbl>
## 1    3.47   57.5 0.940  1.86
summary(lmWindFull)$coefficients %>% 
    as.data.frame() %>% 
    rownames_to_column("Variable") %>% 
    tibble::as_tibble() %>% 
    arrange(desc(abs(`t value`)))
## # A tibble: 46 × 5
##    Variable                     Estimate `Std. Error` `t value` `Pr(>|t|)`
##    <chr>                           <dbl>        <dbl>     <dbl>      <dbl>
##  1 windspeed_100m                0.490      0.000648      757.           0
##  2 windgusts_10m                 0.0941     0.000536      176.           0
##  3 apparent_temperature         -0.562      0.00333      -169.           0
##  4 temperature_2m                0.480      0.00404       119.           0
##  5 shortwave_radiation           0.00589    0.0000631      93.4          0
##  6 soil_moisture_100_to_255cm    6.60       0.0735         89.7          0
##  7 direct_radiation             -0.00554    0.0000676     -82.0          0
##  8 (Intercept)                -129.         1.63          -79.2          0
##  9 pressure_msl                  0.0457     0.000608       75.2          0
## 10 dewpoint_2m                   0.0994     0.00136        73.1          0
## # ℹ 36 more rows

Even with many confounders, the linear model largely identifies that high-level wind-speed is a strong predictor for ground-level wind speed. Many other variables have statistically significant impact also, with wind gusts, apparent temperature, and actual temperature being of interest

The model is re-run using only the best four predictors:

# Best predictors only
lmWindFour <- lm(windspeed_10m ~ windspeed_100m + windgusts_10m + apparent_temperature + temperature_2m, 
                 data=allCity %>% 
                     filter(tt=="train", year<2022) %>% 
                     select(all_of(varsTrain)) %>% 
                     select(-diffuse_radiation)
                 )
summary(lmWindFour)
## 
## Call:
## lm(formula = windspeed_10m ~ windspeed_100m + windgusts_10m + 
##     apparent_temperature + temperature_2m, data = allCity %>% 
##     filter(tt == "train", year < 2022) %>% select(all_of(varsTrain)) %>% 
##     select(-diffuse_radiation))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.8387  -1.1335   0.0315   1.1358  10.4214 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -1.5525758  0.0093847  -165.4   <2e-16 ***
## windspeed_100m        0.4445631  0.0005079   875.4   <2e-16 ***
## windgusts_10m         0.1732658  0.0004583   378.1   <2e-16 ***
## apparent_temperature -0.1764342  0.0014401  -122.5   <2e-16 ***
## temperature_2m        0.2140689  0.0017273   123.9   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.863 on 368105 degrees of freedom
## Multiple R-squared:  0.9264, Adjusted R-squared:  0.9264 
## F-statistic: 1.159e+06 on 4 and 368105 DF,  p-value: < 2.2e-16
allCity %>% 
    filter(tt=="test", year==2022) %>%
    mutate(pred=predict(lmWindFour, newdata=.)) %>%
    summarize(meModel=mean((pred-windspeed_10m)**2), 
              meBase=mean((windspeed_10m-mean(windspeed_10m))**2), 
              r2=1-meModel/meBase, 
              rmse=sqrt(meModel)
              )
## # A tibble: 1 × 4
##   meModel meBase    r2  rmse
##     <dbl>  <dbl> <dbl> <dbl>
## 1    5.01   57.5 0.913  2.24

The four-predictor model retains most, but not all, of the explanatory power of the full model. R2 on the test dataset falls from ~94% to ~92% (RMSE increases from ~1.9 to ~2.2)

Each of the best-four predictors is run individually, with metrics assessed on training data:

sapply(c("windspeed_100m", "windgusts_10m", "apparent_temperature", "temperature_2m"), 
       FUN=function(x) summary(lm(windspeed_10m ~ get(x), data=allCity))[c("sigma", "r.squared")] %>% unlist
       ) %>%
    t()
##                         sigma  r.squared
## windspeed_100m       2.389386 0.88251896
## windgusts_10m        3.427561 0.75825046
## apparent_temperature 6.835019 0.03866518
## temperature_2m       6.916588 0.01558339

High-level wind speed and wind gusts have good standalone predictive power on ground level wind speed. Temperature and apparent temperature have essentially no standalone predictive power on ground level wind speed

Each variable is run through the random forest standalone, using a smaller training dataset:

# Variables to explore
useWind <- c(varsTrain[varsTrain!="windspeed_10m"], "src", "todSeason")
useWind
##  [1] "hour"                          "temperature_2m"               
##  [3] "relativehumidity_2m"           "dewpoint_2m"                  
##  [5] "apparent_temperature"          "pressure_msl"                 
##  [7] "surface_pressure"              "precipitation"                
##  [9] "rain"                          "snowfall"                     
## [11] "cloudcover"                    "cloudcover_low"               
## [13] "cloudcover_mid"                "cloudcover_high"              
## [15] "shortwave_radiation"           "direct_radiation"             
## [17] "direct_normal_irradiance"      "diffuse_radiation"            
## [19] "windspeed_100m"                "winddirection_10m"            
## [21] "winddirection_100m"            "windgusts_10m"                
## [23] "et0_fao_evapotranspiration"    "weathercode"                  
## [25] "vapor_pressure_deficit"        "soil_temperature_0_to_7cm"    
## [27] "soil_temperature_7_to_28cm"    "soil_temperature_28_to_100cm" 
## [29] "soil_temperature_100_to_255cm" "soil_moisture_0_to_7cm"       
## [31] "soil_moisture_7_to_28cm"       "soil_moisture_28_to_100cm"    
## [33] "soil_moisture_100_to_255cm"    "year"                         
## [35] "doy"                           "src"                          
## [37] "todSeason"
# Subsets to use (dfTrainCloud and dfTestCloud created previously)
set.seed(24091314)
idxSmallWind <- sample(1:nrow(dfTrainCloud), 5000, replace=FALSE)
mtxSmallWind <- matrix(nrow=0, ncol=3)

# Map each variable to file
rfWindOneSmall <- map_dfr(.x=useWind, 
                          .f=function(x) runFullRF(dfTrain=dfTrainCloud[idxSmallWind,], 
                                                   yVar="windspeed_10m", 
                                                   xVars=x, 
                                                   dfTest=dfTestCloud, 
                                                   isContVar=TRUE,
                                                   makePlots=FALSE,
                                                   returnData=TRUE
                                                   )[["rfAcc"]] %>%
                              t() %>%
                              as_tibble(), 
                          .id="varNum"
        ) %>%
    mutate(varName=useWind[as.numeric(varNum)])
## 
## R-squared of test data is: -0.615% (RMSE 7.61 vs. 7.58 null)
## 
## R-squared of test data is: -5.015% (RMSE 7.77 vs. 7.58 null)
## 
## R-squared of test data is: -0.827% (RMSE 7.62 vs. 7.58 null)
## 
## R-squared of test data is: -7.341% (RMSE 7.86 vs. 7.58 null)
## 
## R-squared of test data is: -2.292% (RMSE 7.67 vs. 7.58 null)
## 
## R-squared of test data is: -4.177% (RMSE 7.74 vs. 7.58 null)
## 
## R-squared of test data is: 8.616% (RMSE 7.25 vs. 7.58 null)
## 
## R-squared of test data is: -0.975% (RMSE 7.62 vs. 7.58 null)
## 
## R-squared of test data is: -1.665% (RMSE 7.65 vs. 7.58 null)
## 
## R-squared of test data is: -1.356% (RMSE 7.64 vs. 7.58 null)
## 
## R-squared of test data is: 0.054% (RMSE 7.58 vs. 7.58 null)
## 
## R-squared of test data is: -1.597% (RMSE 7.64 vs. 7.58 null)
## 
## R-squared of test data is: -0.565% (RMSE 7.61 vs. 7.58 null)
## 
## R-squared of test data is: -2.852% (RMSE 7.69 vs. 7.58 null)
## 
## R-squared of test data is: -13.837% (RMSE 8.09 vs. 7.58 null)
## 
## R-squared of test data is: -11.645% (RMSE 8.01 vs. 7.58 null)
## 
## R-squared of test data is: -15.374% (RMSE 8.15 vs. 7.58 null)
## 
## R-squared of test data is: -5.057% (RMSE 7.77 vs. 7.58 null)
## 
## R-squared of test data is: 86.606% (RMSE 2.78 vs. 7.58 null)
## 
## R-squared of test data is: -0.053% (RMSE 7.59 vs. 7.58 null)
## 
## R-squared of test data is: -2.124% (RMSE 7.66 vs. 7.58 null)
## 
## R-squared of test data is: 73.62% (RMSE 3.9 vs. 7.58 null)
## 
## R-squared of test data is: 5.661% (RMSE 7.37 vs. 7.58 null)
## 
## R-squared of test data is: 1.555% (RMSE 7.53 vs. 7.58 null)
## 
## R-squared of test data is: -7.35% (RMSE 7.86 vs. 7.58 null)
## 
## R-squared of test data is: -6.337% (RMSE 7.82 vs. 7.58 null)
## 
## R-squared of test data is: -4.04% (RMSE 7.74 vs. 7.58 null)
## 
## R-squared of test data is: 1.29% (RMSE 7.54 vs. 7.58 null)
## 
## R-squared of test data is: -0.911% (RMSE 7.62 vs. 7.58 null)
## 
## R-squared of test data is: -5.933% (RMSE 7.81 vs. 7.58 null)
## 
## R-squared of test data is: -10.608% (RMSE 7.98 vs. 7.58 null)
## 
## R-squared of test data is: -12.566% (RMSE 8.05 vs. 7.58 null)
## 
## R-squared of test data is: 12.929% (RMSE 7.08 vs. 7.58 null)
## 
## R-squared of test data is: -0.394% (RMSE 7.6 vs. 7.58 null)
## 
## R-squared of test data is: -5.141% (RMSE 7.78 vs. 7.58 null)
## 
## R-squared of test data is: 17.812% (RMSE 6.88 vs. 7.58 null)
## 
## R-squared of test data is: 1.129% (RMSE 7.54 vs. 7.58 null)
rfWindOneSmall %>% arrange(desc(r2))
## # A tibble: 37 × 5
##    varNum mseNull msePred       r2 varName                     
##    <chr>    <dbl>   <dbl>    <dbl> <chr>                       
##  1 19        57.5    7.70 0.866    windspeed_100m              
##  2 22        57.5   15.2  0.736    windgusts_10m               
##  3 36        57.5   47.3  0.178    src                         
##  4 33        57.5   50.1  0.129    soil_moisture_100_to_255cm  
##  5 7         57.5   52.6  0.0862   surface_pressure            
##  6 23        57.5   54.3  0.0566   et0_fao_evapotranspiration  
##  7 24        57.5   56.6  0.0155   weathercode                 
##  8 28        57.5   56.8  0.0129   soil_temperature_28_to_100cm
##  9 37        57.5   56.9  0.0113   todSeason                   
## 10 11        57.5   57.5  0.000541 cloudcover                  
## # ℹ 27 more rows

The random forest model identifies the same standalone best predictors, and with very similar R2

A null model is run to predict ground-level wind speed, using only city, month, and day/night (mtry=3):

keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
rfWindNull <- runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022), 
                      yVar="windspeed_10m", 
                      xVars=c("src", "month", "tod"), 
                      dfTest=allCity %>% filter(tt=="test", year==2022), 
                      useLabel=keyLabel, 
                      useSub=stringr::str_to_sentence(keyLabel), 
                      mtry=3,
                      isContVar=TRUE,
                      rndTo=-1L,
                      refXY=TRUE,
                      returnData=TRUE
                      )
## Growing trees.. Progress: 74%. Estimated remaining time: 10 seconds.

## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.22% (RMSE 6.56 vs. 7.58 null)
## `geom_smooth()` using formula = 'y ~ x'

The null model is minimally effective (~25% R-squared) at predicting ground-level wind speed

The same null model is run using lm:

# Null predictors only
lmWindNull <- lm(windspeed_10m ~ src:month:tod + 0, 
                 data=allCity %>% 
                     filter(tt=="train", year<2022)
                 )
summary(lmWindNull)
## 
## Call:
## lm(formula = windspeed_10m ~ src:month:tod + 0, data = allCity %>% 
##     filter(tt == "train", year < 2022))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -17.907  -3.906  -0.854   3.003  58.185 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## srcChicago:monthJan:todDay    17.6661     0.1059  166.80   <2e-16 ***
## srcHouston:monthJan:todDay    14.6756     0.1049  139.93   <2e-16 ***
## srcLA:monthJan:todDay          8.3357     0.1053   79.14   <2e-16 ***
## srcNYC:monthJan:todDay        12.9985     0.1051  123.70   <2e-16 ***
## srcVegas:monthJan:todDay       7.4405     0.1052   70.71   <2e-16 ***
## srcChicago:monthFeb:todDay    17.4299     0.1101  158.36   <2e-16 ***
## srcHouston:monthFeb:todDay    15.2828     0.1107  138.02   <2e-16 ***
## srcLA:monthFeb:todDay          8.1749     0.1102   74.19   <2e-16 ***
## srcNYC:monthFeb:todDay        12.5546     0.1106  113.54   <2e-16 ***
## srcVegas:monthFeb:todDay       9.6153     0.1108   86.80   <2e-16 ***
## srcChicago:monthMar:todDay    17.3240     0.1038  166.86   <2e-16 ***
## srcHouston:monthMar:todDay    15.3995     0.1056  145.90   <2e-16 ***
## srcLA:monthMar:todDay          8.2840     0.1048   79.02   <2e-16 ***
## srcNYC:monthMar:todDay        13.9386     0.1048  132.99   <2e-16 ***
## srcVegas:monthMar:todDay      11.1745     0.1040  107.50   <2e-16 ***
## srcChicago:monthApr:todDay    18.3074     0.1067  171.50   <2e-16 ***
## srcHouston:monthApr:todDay    16.1890     0.1073  150.83   <2e-16 ***
## srcLA:monthApr:todDay          9.0709     0.1065   85.20   <2e-16 ***
## srcNYC:monthApr:todDay        13.4615     0.1075  125.25   <2e-16 ***
## srcVegas:monthApr:todDay      12.3777     0.1071  115.55   <2e-16 ***
## srcChicago:monthMay:todDay    16.5924     0.1050  157.98   <2e-16 ***
## srcHouston:monthMay:todDay    14.9801     0.1060  141.37   <2e-16 ***
## srcLA:monthMay:todDay          8.8806     0.1048   84.70   <2e-16 ***
## srcNYC:monthMay:todDay        11.6595     0.1055  110.50   <2e-16 ***
## srcVegas:monthMay:todDay      12.2198     0.1054  115.96   <2e-16 ***
## srcChicago:monthJun:todDay    14.8766     0.1069  139.11   <2e-16 ***
## srcHouston:monthJun:todDay    11.9026     0.1067  111.52   <2e-16 ***
## srcLA:monthJun:todDay          8.1789     0.1072   76.32   <2e-16 ***
## srcNYC:monthJun:todDay        10.8856     0.1069  101.81   <2e-16 ***
## srcVegas:monthJun:todDay      12.0571     0.1074  112.31   <2e-16 ***
## srcChicago:monthJul:todDay    13.2059     0.1051  125.64   <2e-16 ***
## srcHouston:monthJul:todDay    10.0919     0.1045   96.59   <2e-16 ***
## srcLA:monthJul:todDay          7.6432     0.1056   72.40   <2e-16 ***
## srcNYC:monthJul:todDay         9.6149     0.1044   92.06   <2e-16 ***
## srcVegas:monthJul:todDay      10.4480     0.1056   98.97   <2e-16 ***
## srcChicago:monthAug:todDay    12.5962     0.1046  120.43   <2e-16 ***
## srcHouston:monthAug:todDay     9.9503     0.1053   94.50   <2e-16 ***
## srcLA:monthAug:todDay          7.4806     0.1046   71.51   <2e-16 ***
## srcNYC:monthAug:todDay         9.4848     0.1041   91.10   <2e-16 ***
## srcVegas:monthAug:todDay      10.0057     0.1047   95.57   <2e-16 ***
## srcChicago:monthSep:todDay    15.2389     0.1064  143.25   <2e-16 ***
## srcHouston:monthSep:todDay    11.0266     0.1064  103.59   <2e-16 ***
## srcLA:monthSep:todDay          7.2291     0.1065   67.89   <2e-16 ***
## srcNYC:monthSep:todDay        10.5536     0.1061   99.47   <2e-16 ***
## srcVegas:monthSep:todDay       9.2257     0.1068   86.37   <2e-16 ***
## srcChicago:monthOct:todDay    17.6309     0.1051  167.76   <2e-16 ***
## srcHouston:monthOct:todDay    12.2793     0.1049  117.08   <2e-16 ***
## srcLA:monthOct:todDay          7.3985     0.1051   70.41   <2e-16 ***
## srcNYC:monthOct:todDay        12.3331     0.1051  117.32   <2e-16 ***
## srcVegas:monthOct:todDay       8.4465     0.1051   80.36   <2e-16 ***
## srcChicago:monthNov:todDay    18.4158     0.1068  172.46   <2e-16 ***
## srcHouston:monthNov:todDay    13.5093     0.1067  126.64   <2e-16 ***
## srcLA:monthNov:todDay          7.4528     0.1070   69.68   <2e-16 ***
## srcNYC:monthNov:todDay        12.3604     0.1072  115.35   <2e-16 ***
## srcVegas:monthNov:todDay       7.7986     0.1063   73.33   <2e-16 ***
## srcChicago:monthDec:todDay    17.2362     0.1044  165.16   <2e-16 ***
## srcHouston:monthDec:todDay    14.1162     0.1057  133.61   <2e-16 ***
## srcLA:monthDec:todDay          8.1803     0.1055   77.56   <2e-16 ***
## srcNYC:monthDec:todDay        11.9286     0.1046  113.99   <2e-16 ***
## srcVegas:monthDec:todDay       7.4219     0.1048   70.84   <2e-16 ***
## srcChicago:monthJan:todNight  17.6431     0.1052  167.69   <2e-16 ***
## srcHouston:monthJan:todNight  12.5177     0.1054  118.81   <2e-16 ***
## srcLA:monthJan:todNight        8.2679     0.1062   77.86   <2e-16 ***
## srcNYC:monthJan:todNight      12.0810     0.1052  114.88   <2e-16 ***
## srcVegas:monthJan:todNight     6.7663     0.1062   63.70   <2e-16 ***
## srcChicago:monthFeb:todNight  17.3093     0.1102  157.04   <2e-16 ***
## srcHouston:monthFeb:todNight  13.2973     0.1107  120.09   <2e-16 ***
## srcLA:monthFeb:todNight        6.9601     0.1098   63.39   <2e-16 ***
## srcNYC:monthFeb:todNight      11.9201     0.1107  107.65   <2e-16 ***
## srcVegas:monthFeb:todNight     8.0001     0.1097   72.92   <2e-16 ***
## srcChicago:monthMar:todNight  16.3501     0.1054  155.18   <2e-16 ***
## srcHouston:monthMar:todNight  12.9646     0.1048  123.68   <2e-16 ***
## srcLA:monthMar:todNight        6.0397     0.1047   57.66   <2e-16 ***
## srcNYC:monthMar:todNight      12.5029     0.1057  118.30   <2e-16 ***
## srcVegas:monthMar:todNight     8.9150     0.1056   84.45   <2e-16 ***
## srcChicago:monthApr:todNight  16.5691     0.1069  154.94   <2e-16 ***
## srcHouston:monthApr:todNight  13.2652     0.1073  123.61   <2e-16 ***
## srcLA:monthApr:todNight        5.8407     0.1061   55.07   <2e-16 ***
## srcNYC:monthApr:todNight      11.8948     0.1070  111.17   <2e-16 ***
## srcVegas:monthApr:todNight     9.9145     0.1059   93.61   <2e-16 ***
## srcChicago:monthMay:todNight  14.9166     0.1046  142.62   <2e-16 ***
## srcHouston:monthMay:todNight  12.4708     0.1049  118.93   <2e-16 ***
## srcLA:monthMay:todNight        4.5259     0.1044   43.33   <2e-16 ***
## srcNYC:monthMay:todNight      10.3662     0.1047   99.05   <2e-16 ***
## srcVegas:monthMay:todNight     9.9005     0.1054   93.97   <2e-16 ***
## srcChicago:monthJun:todNight  13.6958     0.1067  128.34   <2e-16 ***
## srcHouston:monthJun:todNight  10.3851     0.1064   97.62   <2e-16 ***
## srcLA:monthJun:todNight        3.9178     0.1072   36.56   <2e-16 ***
## srcNYC:monthJun:todNight       9.6970     0.1073   90.39   <2e-16 ***
## srcVegas:monthJun:todNight     9.7362     0.1066   91.34   <2e-16 ***
## srcChicago:monthJul:todNight  12.0225     0.1048  114.69   <2e-16 ***
## srcHouston:monthJul:todNight   9.6380     0.1054   91.42   <2e-16 ***
## srcLA:monthJul:todNight        3.7239     0.1053   35.36   <2e-16 ***
## srcNYC:monthJul:todNight       8.9811     0.1053   85.25   <2e-16 ***
## srcVegas:monthJul:todNight     8.3553     0.1046   79.87   <2e-16 ***
## srcChicago:monthAug:todNight  11.3610     0.1046  108.62   <2e-16 ***
## srcHouston:monthAug:todNight   9.8231     0.1051   93.50   <2e-16 ***
## srcLA:monthAug:todNight        3.4544     0.1060   32.60   <2e-16 ***
## srcNYC:monthAug:todNight       8.4208     0.1053   79.96   <2e-16 ***
## srcVegas:monthAug:todNight     7.9305     0.1046   75.83   <2e-16 ***
## srcChicago:monthSep:todNight  14.4814     0.1073  134.99   <2e-16 ***
## srcHouston:monthSep:todNight   9.2157     0.1074   85.85   <2e-16 ***
## srcLA:monthSep:todNight        4.0097     0.1066   37.62   <2e-16 ***
## srcNYC:monthSep:todNight       9.6622     0.1069   90.40   <2e-16 ***
## srcVegas:monthSep:todNight     7.7219     0.1060   72.84   <2e-16 ***
## srcChicago:monthOct:todNight  16.6812     0.1050  158.88   <2e-16 ***
## srcHouston:monthOct:todNight  10.8022     0.1050  102.90   <2e-16 ***
## srcLA:monthOct:todNight        5.5429     0.1047   52.92   <2e-16 ***
## srcNYC:monthOct:todNight      11.4524     0.1044  109.69   <2e-16 ***
## srcVegas:monthOct:todNight     7.6152     0.1049   72.62   <2e-16 ***
## srcChicago:monthNov:todNight  17.9773     0.1070  168.02   <2e-16 ***
## srcHouston:monthNov:todNight  11.8271     0.1071  110.45   <2e-16 ***
## srcLA:monthNov:todNight        6.8622     0.1071   64.06   <2e-16 ***
## srcNYC:monthNov:todNight      11.7894     0.1073  109.89   <2e-16 ***
## srcVegas:monthNov:todNight     7.2725     0.1074   67.72   <2e-16 ***
## srcChicago:monthDec:todNight  16.9604     0.1048  161.90   <2e-16 ***
## srcHouston:monthDec:todNight  12.7281     0.1055  120.67   <2e-16 ***
## srcLA:monthDec:todNight        7.9781     0.1060   75.26   <2e-16 ***
## srcNYC:monthDec:todNight      11.6663     0.1050  111.15   <2e-16 ***
## srcVegas:monthDec:todNight     7.1803     0.1044   68.79   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.874 on 367990 degrees of freedom
## Multiple R-squared:  0.7978, Adjusted R-squared:  0.7977 
## F-statistic: 1.21e+04 on 120 and 367990 DF,  p-value: < 2.2e-16
allCity %>% 
    filter(tt=="test", year==2022) %>%
    mutate(pred=predict(lmWindNull, newdata=.)) %>%
    summarize(meModel=mean((pred-windspeed_10m)**2), 
              meBase=mean((windspeed_10m-mean(windspeed_10m))**2), 
              r2=1-meModel/meBase, 
              rmse=sqrt(meModel)
              )
## # A tibble: 1 × 4
##   meModel meBase    r2  rmse
##     <dbl>  <dbl> <dbl> <dbl>
## 1    43.0   57.5 0.252  6.56

As expected, the models drive nearly identical results and R-squared

A model is run to predict ground-level wind speed, excluding high-level wind speed and gusts as predictors:

keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022), 
          yVar="windspeed_10m", 
          xVars=c(varsTrain[!str_detect(varsTrain, "^wind[sg]")], "src", "month", "tod"), 
          dfTest=allCity %>% filter(tt=="test", year==2022), 
          useLabel=keyLabel, 
          useSub=stringr::str_to_sentence(keyLabel), 
          isContVar=TRUE,
          rndTo=-1L,
          refXY=TRUE,
          returnData=FALSE
          )
## Growing trees.. Progress: 8%. Estimated remaining time: 5 minutes, 38 seconds.
## Growing trees.. Progress: 17%. Estimated remaining time: 5 minutes, 3 seconds.
## Growing trees.. Progress: 27%. Estimated remaining time: 4 minutes, 11 seconds.
## Growing trees.. Progress: 39%. Estimated remaining time: 3 minutes, 17 seconds.
## Growing trees.. Progress: 52%. Estimated remaining time: 2 minutes, 21 seconds.
## Growing trees.. Progress: 66%. Estimated remaining time: 1 minute, 38 seconds.
## Growing trees.. Progress: 80%. Estimated remaining time: 56 seconds.
## Growing trees.. Progress: 93%. Estimated remaining time: 17 seconds.

## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 70.684% (RMSE 4.11 vs. 7.58 null)
## `geom_smooth()` using formula = 'y ~ x'

The model is reasonably effective (~70% R-squared) at predicting ground-level wind speed, by leveraging multiple predictors in combination

The linear model is re-run for ground wind speed, excluding wind gusts and high-level wind speed:

# Eliminate diffuse radiation due to rank-deficiency
lmWindNoWG <- lm(windspeed_10m ~ ., 
                 data=allCity %>% 
                     filter(tt=="train", year<2022) %>% 
                     mutate(weathercode=factor(weathercode)) %>%
                     select(all_of(varsTrain), src, month, tod) %>% 
                     select(-diffuse_radiation, -windgusts_10m, -windspeed_100m)
                 )
summary(lmWindNoWG)
## 
## Call:
## lm(formula = windspeed_10m ~ ., data = allCity %>% filter(tt == 
##     "train", year < 2022) %>% mutate(weathercode = factor(weathercode)) %>% 
##     select(all_of(varsTrain), src, month, tod) %>% select(-diffuse_radiation, 
##     -windgusts_10m, -windspeed_100m))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.5453  -2.0461  -0.2352   1.8298  25.8615 
## 
## Coefficients:
##                                 Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)                   -8.794e+01  3.444e+00  -25.530  < 2e-16 ***
## hour                          -5.290e-03  8.995e-04   -5.881 4.09e-09 ***
## temperature_2m                 4.215e+00  7.368e-03  572.153  < 2e-16 ***
## relativehumidity_2m            4.803e-02  8.794e-04   54.620  < 2e-16 ***
## dewpoint_2m                    2.985e-01  2.848e-03  104.818  < 2e-16 ***
## apparent_temperature          -3.856e+00  5.570e-03 -692.312  < 2e-16 ***
## pressure_msl                   7.602e-01  8.150e-03   93.274  < 2e-16 ***
## surface_pressure              -8.658e-01  8.277e-03 -104.604  < 2e-16 ***
## precipitation                 -5.503e+00  1.475e+00   -3.731 0.000190 ***
## rain                           5.736e+00  1.476e+00    3.886 0.000102 ***
## snowfall                       1.022e+01  2.137e+00    4.780 1.75e-06 ***
## cloudcover                     1.580e-02  9.370e-04   16.862  < 2e-16 ***
## cloudcover_low                -4.887e-03  4.837e-04  -10.104  < 2e-16 ***
## cloudcover_mid                -1.365e-02  3.621e-04  -37.703  < 2e-16 ***
## cloudcover_high               -1.396e-03  2.287e-04   -6.105 1.03e-09 ***
## shortwave_radiation           -1.250e-02  1.369e-04  -91.338  < 2e-16 ***
## direct_radiation               7.379e-03  1.589e-04   46.428  < 2e-16 ***
## direct_normal_irradiance      -2.042e-03  5.836e-05  -34.997  < 2e-16 ***
## winddirection_10m             -2.326e-04  8.770e-05   -2.653 0.007979 ** 
## winddirection_100m             2.842e-03  8.837e-05   32.159  < 2e-16 ***
## et0_fao_evapotranspiration     2.837e+01  1.527e-01  185.744  < 2e-16 ***
## weathercode1                   1.826e-01  2.295e-02    7.958 1.76e-15 ***
## weathercode2                   1.877e-01  4.127e-02    4.548 5.41e-06 ***
## weathercode3                  -8.981e-02  5.437e-02   -1.652 0.098590 .  
## weathercode51                  1.195e+00  4.870e-02   24.530  < 2e-16 ***
## weathercode53                  1.034e+00  6.451e-02   16.029  < 2e-16 ***
## weathercode55                  1.104e+00  9.142e-02   12.073  < 2e-16 ***
## weathercode61                  1.255e+00  8.802e-02   14.264  < 2e-16 ***
## weathercode63                  1.377e+00  1.449e-01    9.509  < 2e-16 ***
## weathercode65                  8.559e-01  3.970e-01    2.156 0.031092 *  
## weathercode71                  1.541e+00  9.822e-02   15.692  < 2e-16 ***
## weathercode73                  1.471e+00  1.422e-01   10.343  < 2e-16 ***
## weathercode75                  9.751e-01  3.964e-01    2.460 0.013910 *  
## vapor_pressure_deficit        -3.853e+00  1.831e-02 -210.414  < 2e-16 ***
## soil_temperature_0_to_7cm      2.012e-01  2.594e-03   77.579  < 2e-16 ***
## soil_temperature_7_to_28cm     2.356e-01  4.389e-03   53.679  < 2e-16 ***
## soil_temperature_28_to_100cm  -2.907e-01  5.390e-03  -53.940  < 2e-16 ***
## soil_temperature_100_to_255cm -4.260e-02  4.254e-03  -10.014  < 2e-16 ***
## soil_moisture_0_to_7cm         9.197e+00  1.513e-01   60.780  < 2e-16 ***
## soil_moisture_7_to_28cm       -9.172e+00  2.142e-01  -42.823  < 2e-16 ***
## soil_moisture_28_to_100cm      1.344e+00  1.629e-01    8.254  < 2e-16 ***
## soil_moisture_100_to_255cm    -1.393e+00  2.364e-01   -5.891 3.85e-09 ***
## year                           8.494e-02  1.680e-03   50.569  < 2e-16 ***
## doy                            5.569e-03  6.054e-04    9.199  < 2e-16 ***
## srcHouston                     1.927e+01  1.650e-01  116.744  < 2e-16 ***
## srcLA                         -2.018e+01  1.493e-01 -135.132  < 2e-16 ***
## srcNYC                         1.319e+01  1.455e-01   90.657  < 2e-16 ***
## srcVegas                      -5.298e+01  4.716e-01 -112.350  < 2e-16 ***
## monthFeb                      -5.999e-01  3.345e-02  -17.934  < 2e-16 ***
## monthMar                      -1.341e+00  4.800e-02  -27.950  < 2e-16 ***
## monthApr                      -1.825e+00  6.593e-02  -27.686  < 2e-16 ***
## monthMay                      -1.265e+00  8.379e-02  -15.097  < 2e-16 ***
## monthJun                       4.304e-01  1.026e-01    4.195 2.72e-05 ***
## monthJul                       2.428e+00  1.206e-01   20.135  < 2e-16 ***
## monthAug                       2.176e+00  1.375e-01   15.823  < 2e-16 ***
## monthSep                       9.205e-01  1.536e-01    5.991 2.08e-09 ***
## monthOct                      -1.032e+00  1.696e-01   -6.083 1.18e-09 ***
## monthNov                      -1.640e+00  1.866e-01   -8.792  < 2e-16 ***
## monthDec                      -1.907e+00  2.042e-01   -9.341  < 2e-16 ***
## todNight                       9.624e-02  1.877e-02    5.129 2.92e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.229 on 368050 degrees of freedom
## Multiple R-squared:  0.7791, Adjusted R-squared:  0.7791 
## F-statistic: 2.2e+04 on 59 and 368050 DF,  p-value: < 2.2e-16
allCity %>% 
    filter(tt=="test", year==2022) %>%
    mutate(weathercode=factor(weathercode)) %>%
    mutate(pred=predict(lmWindNoWG, newdata=.)) %>%
    summarize(meModel=mean((pred-windspeed_10m)**2), 
              meBase=mean((windspeed_10m-mean(windspeed_10m))**2), 
              r2=1-meModel/meBase, 
              rmse=sqrt(meModel)
              )
## # A tibble: 1 × 4
##   meModel meBase    r2  rmse
##     <dbl>  <dbl> <dbl> <dbl>
## 1    12.3   57.5 0.787  3.50
summary(lmWindNoWG)$coefficients %>% 
    as.data.frame() %>% 
    rownames_to_column("Variable") %>% 
    tibble::as_tibble() %>% 
    arrange(desc(abs(`t value`)))
## # A tibble: 60 × 5
##    Variable                   Estimate `Std. Error` `t value` `Pr(>|t|)`
##    <chr>                         <dbl>        <dbl>     <dbl>      <dbl>
##  1 apparent_temperature         -3.86       0.00557    -692.           0
##  2 temperature_2m                4.22       0.00737     572.           0
##  3 vapor_pressure_deficit       -3.85       0.0183     -210.           0
##  4 et0_fao_evapotranspiration   28.4        0.153       186.           0
##  5 srcLA                       -20.2        0.149      -135.           0
##  6 srcHouston                   19.3        0.165       117.           0
##  7 srcVegas                    -53.0        0.472      -112.           0
##  8 dewpoint_2m                   0.299      0.00285     105.           0
##  9 surface_pressure             -0.866      0.00828    -105.           0
## 10 pressure_msl                  0.760      0.00815      93.3          0
## # ℹ 50 more rows

Even with many confounders, the linear model perfroms slightly better than the random forest, driving RMSE down to ~3.5 and R2 up to ~80%

All combinations of two variables are explored for predicting ground wind speed on a smaller training dataset:

# Train and test data created previously (dfTrainCloud and dfTestCloud)
# Variables to explore
possWSVars <- c(varsTrain[!str_detect(varsTrain, "windspeed_10m")], "month", "tod", "src")

# Subsets to use
set.seed(24091816)
idxSmallWS <- sample(1:nrow(dfTrainCloud), 5000, replace=FALSE)
mtxSmallWS <- matrix(nrow=0, ncol=3)

for(idx1 in 1:(length(possWSVars)-1)) {
    for(idx2 in (idx1+1):length(possWSVars)) {
        r2SmallWS <- runFullRF(dfTrain=dfTrainCloud[idxSmallWS,] %>% mutate(weathercode=factor(weathercode)), 
                               yVar="windspeed_10m", 
                               xVars=possWSVars[c(idx1, idx2)], 
                               dfTest=dfTestCloud %>% mutate(weathercode=factor(weathercode)), 
                               useLabel=keyLabel, 
                               useSub=stringr::str_to_sentence(keyLabel), 
                               isContVar=TRUE,
                               mtry=2,
                               makePlots=FALSE,
                               returnData=TRUE
                               )[["rfAcc"]][["r2"]]
        mtxSmallWS <- rbind(mtxSmallWS, c(idx1, idx2, r2SmallWS))
    }
}
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.134% (RMSE 7.89 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.79% (RMSE 8.13 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.646% (RMSE 8.05 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.312% (RMSE 7.71 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.339% (RMSE 8.04 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.449% (RMSE 7.14 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.523% (RMSE 7.6 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.982% (RMSE 7.62 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.631% (RMSE 7.56 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.647% (RMSE 7.83 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.585% (RMSE 7.72 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.835% (RMSE 7.69 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.447% (RMSE 7.83 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.354% (RMSE 7.93 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.263% (RMSE 7.82 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.186% (RMSE 7.85 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.707% (RMSE 7.91 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 90.454% (RMSE 2.34 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.985% (RMSE 8.03 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.158% (RMSE 8.03 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.489% (RMSE 3.91 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.736% (RMSE 7.44 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.265% (RMSE 7.54 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.43% (RMSE 8.11 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.245% (RMSE 8.04 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.296% (RMSE 7.93 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.717% (RMSE 7.8 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.809% (RMSE 7.88 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.478% (RMSE 7.97 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.043% (RMSE 7.99 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.111% (RMSE 8.07 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.988% (RMSE 7.28 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.53% (RMSE 7.79 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.356% (RMSE 8.08 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.229% (RMSE 7.58 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.666% (RMSE 7.61 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.455% (RMSE 6.85 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.062% (RMSE 7.85 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.988% (RMSE 7.81 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.167% (RMSE 5.92 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.159% (RMSE 7.63 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.273% (RMSE 7.1 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.942% (RMSE 7.7 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.536% (RMSE 7.72 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.046% (RMSE 7.74 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.195% (RMSE 7.85 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.86% (RMSE 7.84 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.009% (RMSE 7.77 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.358% (RMSE 7.93 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.266% (RMSE 7.86 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.774% (RMSE 7.87 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.13% (RMSE 7.85 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.126% (RMSE 7.78 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.59% (RMSE 2.67 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.864% (RMSE 7.73 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.453% (RMSE 7.75 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.7% (RMSE 3.89 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.806% (RMSE 7.52 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.126% (RMSE 7.85 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.199% (RMSE 7.85 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.569% (RMSE 7.9 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.302% (RMSE 7.71 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.145% (RMSE 7.63 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.166% (RMSE 7.67 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.649% (RMSE 7.72 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.365% (RMSE 7.75 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.838% (RMSE 8.02 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.551% (RMSE 7.37 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.33% (RMSE 8.29 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.924% (RMSE 7.77 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.867% (RMSE 7.95 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.09% (RMSE 7.89 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.051% (RMSE 7.47 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.659% (RMSE 7.87 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.026% (RMSE 7.66 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.719% (RMSE 7.87 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.065% (RMSE 7.03 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.815% (RMSE 7.52 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.866% (RMSE 7.55 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.563% (RMSE 7.56 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.818% (RMSE 7.73 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.658% (RMSE 7.56 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.511% (RMSE 7.72 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.289% (RMSE 7.86 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.49% (RMSE 7.9 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.572% (RMSE 7.87 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.205% (RMSE 7.85 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.626% (RMSE 7.87 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.251% (RMSE 2.71 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.676% (RMSE 7.8 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.883% (RMSE 7.88 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.236% (RMSE 3.92 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.651% (RMSE 7.37 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.868% (RMSE 7.47 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.167% (RMSE 7.92 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.015% (RMSE 7.92 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.512% (RMSE 7.83 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.122% (RMSE 7.85 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.594% (RMSE 7.79 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.449% (RMSE 7.83 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.313% (RMSE 7.86 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.278% (RMSE 8.11 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.122% (RMSE 7.11 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.633% (RMSE 7.94 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.482% (RMSE 7.9 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.561% (RMSE 7.9 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.856% (RMSE 7.65 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.281% (RMSE 6.86 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.375% (RMSE 7.38 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.862% (RMSE 7.8 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.094% (RMSE 7.07 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.385% (RMSE 7.71 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.279% (RMSE 7.71 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.54% (RMSE 7.75 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.496% (RMSE 7.9 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.164% (RMSE 7.85 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.344% (RMSE 7.86 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.03% (RMSE 8.03 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.581% (RMSE 7.94 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.945% (RMSE 7.95 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.492% (RMSE 7.94 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.822% (RMSE 7.91 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.215% (RMSE 2.82 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.609% (RMSE 7.83 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.046% (RMSE 7.85 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.421% (RMSE 3.98 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.036% (RMSE 7.81 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.435% (RMSE 7.9 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.276% (RMSE 7.82 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.494% (RMSE 7.86 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.246% (RMSE 7.78 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.401% (RMSE 7.75 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.197% (RMSE 7.78 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.835% (RMSE 7.65 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.34% (RMSE 7.75 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.645% (RMSE 8.12 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.702% (RMSE 7.09 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -24.216% (RMSE 8.45 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.426% (RMSE 7.9 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.67% (RMSE 8.12 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.962% (RMSE 7.99 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.362% (RMSE 7.34 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.894% (RMSE 7.44 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.445% (RMSE 6.97 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.485% (RMSE 7.57 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.338% (RMSE 7.57 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.195% (RMSE 7.59 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.216% (RMSE 7.74 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.982% (RMSE 7.77 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.339% (RMSE 7.71 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.572% (RMSE 7.83 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.571% (RMSE 7.68 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.093% (RMSE 7.66 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.487% (RMSE 7.68 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.496% (RMSE 7.6 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.195% (RMSE 2.71 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.703% (RMSE 7.61 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.648% (RMSE 7.61 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 74.011% (RMSE 3.87 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.082% (RMSE 7.23 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.23% (RMSE 7.74 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.272% (RMSE 7.5 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.707% (RMSE 7.44 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.183% (RMSE 7.7 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.276% (RMSE 7.57 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.249% (RMSE 7.63 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.713% (RMSE 7.56 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.822% (RMSE 7.62 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.302% (RMSE 7.82 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.62% (RMSE 7.09 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.685% (RMSE 8.12 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.678% (RMSE 7.61 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.903% (RMSE 7.77 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.737% (RMSE 7.72 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.83% (RMSE 7.36 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.003% (RMSE 6.99 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.458% (RMSE 7.68 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.264% (RMSE 7.71 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.957% (RMSE 7.66 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.007% (RMSE 7.81 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.207% (RMSE 7.78 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.208% (RMSE 7.74 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.349% (RMSE 7.89 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.554% (RMSE 7.9 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.555% (RMSE 7.9 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.522% (RMSE 7.83 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.26% (RMSE 7.86 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86% (RMSE 2.84 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.231% (RMSE 7.78 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.123% (RMSE 7.78 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.138% (RMSE 4 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.34% (RMSE 7.67 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.336% (RMSE 7.78 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.526% (RMSE 7.86 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.92% (RMSE 7.69 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.258% (RMSE 7.54 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.977% (RMSE 7.55 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.978% (RMSE 7.66 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.232% (RMSE 7.59 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.034% (RMSE 7.66 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.16% (RMSE 7.78 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.524% (RMSE 7.01 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -25.558% (RMSE 8.5 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.451% (RMSE 7.79 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.392% (RMSE 7.97 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.642% (RMSE 7.8 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.063% (RMSE 7.11 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.55% (RMSE 7.25 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.197% (RMSE 7.27 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.959% (RMSE 7.28 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.704% (RMSE 7.33 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.668% (RMSE 7.37 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.795% (RMSE 7.28 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.574% (RMSE 7.41 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.792% (RMSE 7.2 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.488% (RMSE 7.18 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.145% (RMSE 7.15 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.176% (RMSE 7.23 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.944% (RMSE 2.74 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.663% (RMSE 7.01 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.243% (RMSE 6.98 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 78.867% (RMSE 3.49 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.815% (RMSE 6.75 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.912% (RMSE 7.32 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.535% (RMSE 7.17 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.461% (RMSE 7.1 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.613% (RMSE 7.05 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.174% (RMSE 6.99 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.292% (RMSE 7.06 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.789% (RMSE 7.24 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.952% (RMSE 7.28 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.179% (RMSE 7.54 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.866% (RMSE 6.96 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.185% (RMSE 7.59 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.641% (RMSE 7.09 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.874% (RMSE 7.24 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.275% (RMSE 7.34 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.146% (RMSE 7.11 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.437% (RMSE 7.6 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.558% (RMSE 7.61 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.833% (RMSE 7.65 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.23% (RMSE 7.67 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.064% (RMSE 7.7 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.248% (RMSE 7.71 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.37% (RMSE 7.97 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.511% (RMSE 7.94 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.914% (RMSE 8.02 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.06% (RMSE 7.74 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.524% (RMSE 2.78 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.695% (RMSE 7.61 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.214% (RMSE 7.63 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.996% (RMSE 3.87 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.293% (RMSE 7.26 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.462% (RMSE 7.57 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.525% (RMSE 7.86 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.162% (RMSE 7.78 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.171% (RMSE 7.67 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.307% (RMSE 7.57 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.414% (RMSE 7.57 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.539% (RMSE 7.83 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.546% (RMSE 7.68 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.523% (RMSE 7.9 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.859% (RMSE 7.08 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.248% (RMSE 7.59 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.687% (RMSE 7.76 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.488% (RMSE 7.53 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.722% (RMSE 7.61 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.534% (RMSE 6.89 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.492% (RMSE 7.6 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.807% (RMSE 7.65 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.815% (RMSE 7.69 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.922% (RMSE 7.69 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.213% (RMSE 7.74 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.632% (RMSE 8.01 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.608% (RMSE 7.98 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.346% (RMSE 8.07 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.383% (RMSE 7.79 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.545% (RMSE 2.78 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.132% (RMSE 7.63 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.013% (RMSE 7.66 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 74.105% (RMSE 3.86 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.421% (RMSE 7.34 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.574% (RMSE 7.56 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.164% (RMSE 7.89 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.536% (RMSE 7.79 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.946% (RMSE 7.66 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.92% (RMSE 7.55 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.629% (RMSE 7.56 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.469% (RMSE 7.86 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.558% (RMSE 7.72 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.428% (RMSE 7.93 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.926% (RMSE 7.08 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.031% (RMSE 7.62 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.309% (RMSE 7.78 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.641% (RMSE 7.56 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.512% (RMSE 7.64 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.193% (RMSE 6.9 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.312% (RMSE 7.57 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.711% (RMSE 7.61 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.337% (RMSE 7.57 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.203% (RMSE 7.67 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.318% (RMSE 8 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.91% (RMSE 7.95 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.722% (RMSE 8.09 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.42% (RMSE 7.75 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.525% (RMSE 2.78 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.072% (RMSE 7.54 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.022% (RMSE 7.58 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 74.043% (RMSE 3.86 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.386% (RMSE 7.3 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.284% (RMSE 7.54 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.199% (RMSE 7.85 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.425% (RMSE 7.82 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.482% (RMSE 7.72 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.896% (RMSE 7.51 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.11% (RMSE 7.58 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.411% (RMSE 7.79 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.123% (RMSE 7.7 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.336% (RMSE 8.11 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.938% (RMSE 7.08 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.975% (RMSE 7.55 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.662% (RMSE 7.76 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.202% (RMSE 7.54 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.534% (RMSE 7.6 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.769% (RMSE 6.88 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.802% (RMSE 7.76 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.151% (RMSE 7.78 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.934% (RMSE 7.77 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.956% (RMSE 7.88 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.242% (RMSE 7.85 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.168% (RMSE 7.85 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.819% (RMSE 7.84 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.669% (RMSE 2.87 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.707% (RMSE 7.87 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.919% (RMSE 7.81 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.389% (RMSE 3.99 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.68% (RMSE 7.44 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.805% (RMSE 7.61 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.231% (RMSE 7.96 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.022% (RMSE 7.96 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.851% (RMSE 7.88 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.231% (RMSE 7.89 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.56% (RMSE 7.72 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.56% (RMSE 8.01 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.277% (RMSE 7.96 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.619% (RMSE 8.05 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.545% (RMSE 7.25 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.259% (RMSE 8.07 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.163% (RMSE 7.96 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.84% (RMSE 7.73 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.011% (RMSE 7.62 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.236% (RMSE 6.98 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.421% (RMSE 7.75 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.585% (RMSE 7.76 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.182% (RMSE 8 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.723% (RMSE 7.94 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.614% (RMSE 7.98 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.841% (RMSE 7.88 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.024% (RMSE 2.84 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.469% (RMSE 7.83 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.512% (RMSE 7.83 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.156% (RMSE 3.93 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.485% (RMSE 7.33 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.139% (RMSE 7.67 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.417% (RMSE 7.93 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.021% (RMSE 7.96 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.178% (RMSE 7.85 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.34% (RMSE 7.75 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.041% (RMSE 7.62 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.256% (RMSE 7.89 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.7% (RMSE 7.87 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.222% (RMSE 8.21 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.407% (RMSE 7.1 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.619% (RMSE 7.79 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.683% (RMSE 7.98 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.736% (RMSE 7.61 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.939% (RMSE 7.66 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.788% (RMSE 6.92 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.85% (RMSE 7.77 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.557% (RMSE 7.94 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.576% (RMSE 7.87 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.039% (RMSE 7.92 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.514% (RMSE 7.75 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.935% (RMSE 2.84 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.693% (RMSE 7.8 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.648% (RMSE 7.72 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.399% (RMSE 3.98 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.529% (RMSE 7.41 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.612% (RMSE 7.72 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.7% (RMSE 7.91 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.996% (RMSE 7.85 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.294% (RMSE 7.78 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.169% (RMSE 7.74 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.99% (RMSE 7.66 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.374% (RMSE 7.97 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.776% (RMSE 7.87 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.921% (RMSE 8.17 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.925% (RMSE 7.24 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.415% (RMSE 7.71 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.217% (RMSE 7.85 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.076% (RMSE 7.58 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.337% (RMSE 7.6 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.323% (RMSE 7.02 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.129% (RMSE 8.07 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.883% (RMSE 7.99 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.586% (RMSE 8.01 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.923% (RMSE 7.92 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.871% (RMSE 2.85 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.765% (RMSE 7.87 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.942% (RMSE 7.84 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.576% (RMSE 3.97 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.658% (RMSE 7.56 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.896% (RMSE 7.66 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.656% (RMSE 8.05 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.1% (RMSE 8.07 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.491% (RMSE 7.9 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.039% (RMSE 7.77 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.457% (RMSE 7.86 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.742% (RMSE 8.05 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.608% (RMSE 8.01 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.028% (RMSE 8.27 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.53% (RMSE 7.41 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.048% (RMSE 7.92 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.719% (RMSE 8.02 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.798% (RMSE 7.76 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.491% (RMSE 7.72 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.538% (RMSE 7.05 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.652% (RMSE 7.87 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.266% (RMSE 7.82 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.38% (RMSE 7.86 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 91.728% (RMSE 2.18 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.304% (RMSE 7.86 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.106% (RMSE 7.89 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 75.983% (RMSE 3.72 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.823% (RMSE 7.32 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.631% (RMSE 7.91 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.464% (RMSE 7.9 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.946% (RMSE 7.95 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.103% (RMSE 7.85 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.232% (RMSE 7.63 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.413% (RMSE 7.71 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.467% (RMSE 7.97 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.564% (RMSE 7.94 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.126% (RMSE 8.14 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.802% (RMSE 7.16 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.356% (RMSE 8.15 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.121% (RMSE 7.92 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.234% (RMSE 7.82 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.775% (RMSE 8.09 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.275% (RMSE 7.34 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.564% (RMSE 7.87 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.979% (RMSE 7.84 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 91.189% (RMSE 2.25 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.71% (RMSE 7.83 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.45% (RMSE 7.86 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 75.875% (RMSE 3.73 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.176% (RMSE 7.27 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.005% (RMSE 7.85 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.257% (RMSE 7.96 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.457% (RMSE 7.97 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.12% (RMSE 7.85 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.592% (RMSE 7.61 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.736% (RMSE 7.69 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.717% (RMSE 7.98 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.663% (RMSE 7.94 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.802% (RMSE 8.23 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.375% (RMSE 7.14 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.997% (RMSE 8.13 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.968% (RMSE 7.95 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.742% (RMSE 7.84 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.349% (RMSE 8 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.16% (RMSE 7.23 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.481% (RMSE 7.83 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 90.638% (RMSE 2.32 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.392% (RMSE 7.82 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.081% (RMSE 7.85 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 75.715% (RMSE 3.74 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.312% (RMSE 7.3 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.785% (RMSE 7.87 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.423% (RMSE 7.93 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.425% (RMSE 7.93 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.871% (RMSE 7.8 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.982% (RMSE 7.62 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.692% (RMSE 7.69 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.969% (RMSE 7.95 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.131% (RMSE 7.96 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.954% (RMSE 8.2 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.043% (RMSE 7.19 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.515% (RMSE 8.01 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.818% (RMSE 7.98 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.317% (RMSE 7.89 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.789% (RMSE 8.13 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.358% (RMSE 7.26 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 91.532% (RMSE 2.21 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.412% (RMSE 7.82 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.24% (RMSE 7.85 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 75.439% (RMSE 3.76 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.713% (RMSE 7.48 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.615% (RMSE 7.79 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.7% (RMSE 7.91 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.617% (RMSE 7.9 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.706% (RMSE 7.8 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.096% (RMSE 7.58 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.498% (RMSE 7.64 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.017% (RMSE 7.99 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.114% (RMSE 7.92 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.292% (RMSE 8.14 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.717% (RMSE 7.21 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.809% (RMSE 8.06 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.972% (RMSE 7.92 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.643% (RMSE 7.8 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.181% (RMSE 7.82 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.636% (RMSE 7.33 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.028% (RMSE 2.83 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.79% (RMSE 2.86 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 91.197% (RMSE 2.25 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 91.114% (RMSE 2.26 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.829% (RMSE 2.86 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.477% (RMSE 2.68 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.696% (RMSE 2.66 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.354% (RMSE 2.8 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.14% (RMSE 2.82 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.953% (RMSE 2.84 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.041% (RMSE 2.73 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.131% (RMSE 2.72 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.744% (RMSE 2.76 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.508% (RMSE 2.68 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.067% (RMSE 3.03 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.04% (RMSE 2.83 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.717% (RMSE 2.87 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 89.527% (RMSE 2.45 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.309% (RMSE 2.7 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.525% (RMSE 6.93 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.15% (RMSE 4 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.233% (RMSE 7.67 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.878% (RMSE 7.84 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.421% (RMSE 7.93 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.574% (RMSE 7.79 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.467% (RMSE 7.71 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.622% (RMSE 7.65 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.652% (RMSE 7.68 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.838% (RMSE 7.69 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.056% (RMSE 7.74 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.398% (RMSE 7.9 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.366% (RMSE 7.02 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.411% (RMSE 8.29 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.747% (RMSE 7.91 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.342% (RMSE 8 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.965% (RMSE 7.77 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.253% (RMSE 7.15 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 71.788% (RMSE 4.03 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.453% (RMSE 7.71 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.712% (RMSE 7.8 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.696% (RMSE 7.94 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.125% (RMSE 7.85 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.009% (RMSE 7.77 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.343% (RMSE 7.6 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.515% (RMSE 7.68 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.22% (RMSE 7.67 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.711% (RMSE 7.61 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.752% (RMSE 7.87 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.896% (RMSE 6.96 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.58% (RMSE 8.26 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.09% (RMSE 7.78 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.534% (RMSE 7.94 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.351% (RMSE 7.78 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.732% (RMSE 7.17 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 74.654% (RMSE 3.82 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.237% (RMSE 3.92 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.609% (RMSE 3.9 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.743% (RMSE 3.89 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.853% (RMSE 3.95 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.493% (RMSE 3.9 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.556% (RMSE 3.9 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 75.316% (RMSE 3.77 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 74.108% (RMSE 3.86 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 70.348% (RMSE 4.13 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 81.33% (RMSE 3.28 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.138% (RMSE 3.93 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 71.198% (RMSE 4.07 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 71.075% (RMSE 4.08 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 75.029% (RMSE 3.79 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.328% (RMSE 3.19 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.698% (RMSE 7.25 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.385% (RMSE 7.06 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.332% (RMSE 7.57 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.101% (RMSE 7.43 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.079% (RMSE 7.27 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.153% (RMSE 7.43 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.003% (RMSE 7.31 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.113% (RMSE 7.54 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.281% (RMSE 7.67 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.473% (RMSE 6.93 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.123% (RMSE 7.74 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.278% (RMSE 7.67 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.591% (RMSE 7.45 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.691% (RMSE 7.33 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.824% (RMSE 6.58 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.985% (RMSE 7.92 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.859% (RMSE 7.99 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.823% (RMSE 7.88 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.709% (RMSE 7.83 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.347% (RMSE 7.71 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.962% (RMSE 7.92 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.369% (RMSE 7.97 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.399% (RMSE 8.11 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.109% (RMSE 7.19 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.636% (RMSE 7.52 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.364% (RMSE 7.97 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.612% (RMSE 7.45 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.829% (RMSE 7.51 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.218% (RMSE 6.82 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.563% (RMSE 7.83 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.94% (RMSE 7.77 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.29% (RMSE 7.63 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.962% (RMSE 7.77 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.785% (RMSE 7.84 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.933% (RMSE 7.88 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.399% (RMSE 8.18 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.759% (RMSE 7.24 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -28.117% (RMSE 8.58 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.62% (RMSE 7.87 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.756% (RMSE 7.95 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.078% (RMSE 7.96 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.245% (RMSE 7.3 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.586% (RMSE 7.83 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.954% (RMSE 7.73 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.369% (RMSE 7.79 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.48% (RMSE 7.68 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.631% (RMSE 7.76 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.93% (RMSE 8.02 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.284% (RMSE 7.26 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -28.783% (RMSE 8.61 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.183% (RMSE 7.85 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.074% (RMSE 8.1 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.541% (RMSE 7.97 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.451% (RMSE 7.41 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.607% (RMSE 7.83 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.331% (RMSE 7.78 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.15% (RMSE 7.78 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.104% (RMSE 7.74 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.998% (RMSE 7.95 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.921% (RMSE 7.16 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -25.357% (RMSE 8.49 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.405% (RMSE 7.71 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.851% (RMSE 7.91 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.039% (RMSE 7.85 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.788% (RMSE 7.32 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.623% (RMSE 7.76 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.61% (RMSE 7.65 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.862% (RMSE 7.69 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.175% (RMSE 7.7 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.496% (RMSE 6.97 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.964% (RMSE 8.17 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.3% (RMSE 7.75 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.51% (RMSE 7.79 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.53% (RMSE 7.6 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.479% (RMSE 7.18 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.198% (RMSE 7.67 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.737% (RMSE 7.84 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.53% (RMSE 7.49 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.224% (RMSE 7.11 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.411% (RMSE 8.11 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.853% (RMSE 7.51 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.694% (RMSE 7.56 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.045% (RMSE 7.66 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.958% (RMSE 7.16 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.572% (RMSE 7.83 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.792% (RMSE 7.95 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.327% (RMSE 7.18 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -27.991% (RMSE 8.58 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.51% (RMSE 7.83 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.796% (RMSE 8.02 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.836% (RMSE 7.98 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.435% (RMSE 7.49 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.985% (RMSE 8.1 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.485% (RMSE 7.22 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -27.416% (RMSE 8.56 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.889% (RMSE 7.84 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.292% (RMSE 7.86 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.647% (RMSE 7.98 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.578% (RMSE 7.33 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.622% (RMSE 7.21 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -72.845% (RMSE 9.97 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.865% (RMSE 7.88 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.907% (RMSE 8.09 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.615% (RMSE 8.19 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.82% (RMSE 7.52 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.244% (RMSE 7.67 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.383% (RMSE 6.98 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.573% (RMSE 7.01 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.585% (RMSE 7.17 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.338% (RMSE 7.18 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -26.362% (RMSE 8.53 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.48% (RMSE 7.6 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.229% (RMSE 7.59 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.716% (RMSE 6.75 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.804% (RMSE 7.8 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.453% (RMSE 7.97 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.133% (RMSE 7.43 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.636% (RMSE 7.52 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.29% (RMSE 6.69 vs. 7.58 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.928% (RMSE 6.87 vs. 7.58 null)

R-squared by pairs of metrics is explored:

dfSmallR2WS <- as.data.frame(mtxSmallWS) %>% 
    purrr::set_names(c("idx1", "idx2", "r2")) %>% 
    tibble::as_tibble() %>% 
    mutate(var1=possWSVars[idx1], var2=possWSVars[idx2], rn=row_number()) 
dfSmallR2WS %>% arrange(desc(r2)) %>% select(var1, var2, r2) %>% print(n=20)
## # A tibble: 703 × 3
##    var1                     var2                          r2
##    <chr>                    <chr>                      <dbl>
##  1 shortwave_radiation      windspeed_100m             0.917
##  2 diffuse_radiation        windspeed_100m             0.915
##  3 windspeed_100m           windgusts_10m              0.912
##  4 direct_radiation         windspeed_100m             0.912
##  5 windspeed_100m           et0_fao_evapotranspiration 0.911
##  6 direct_normal_irradiance windspeed_100m             0.906
##  7 hour                     windspeed_100m             0.905
##  8 windspeed_100m           tod                        0.895
##  9 windspeed_100m           soil_temperature_0_to_7cm  0.877
## 10 temperature_2m           windspeed_100m             0.876
## 11 windspeed_100m           soil_moisture_100_to_255cm 0.875
## 12 windspeed_100m           vapor_pressure_deficit     0.875
## 13 windspeed_100m           src                        0.873
## 14 relativehumidity_2m      windspeed_100m             0.873
## 15 apparent_temperature     windspeed_100m             0.872
## 16 windspeed_100m           soil_moisture_7_to_28cm    0.871
## 17 windspeed_100m           soil_moisture_0_to_7cm     0.870
## 18 surface_pressure         windspeed_100m             0.869
## 19 windspeed_100m           soil_moisture_28_to_100cm  0.867
## 20 rain                     windspeed_100m             0.865
## # ℹ 683 more rows
dfSmallR2WS %>% 
    pivot_longer(cols=c(var1, var2)) %>% 
    group_by(value) %>% 
    summarize(across(r2, .fns=list("min"=min, "mu"=mean, "max"=max))) %>% 
    ggplot(aes(x=fct_reorder(value, r2_mu))) + 
    coord_flip() + 
    geom_point(aes(y=r2_mu)) + 
    geom_errorbar(aes(ymin=r2_min, ymax=r2_max)) + 
    lims(y=c(NA, 1)) + 
    geom_hline(yintercept=1, lty=2, color="red") +
    labs(title="R-squared in every 2-predictor model including self and one other", 
         subtitle="Predicting ground-level wind speed", 
         y="Range of R2 (min-mean-max)", 
         x=NULL
    )

dfSmallR2WS %>% 
    arrange(desc(r2)) %>% 
    filter(!str_detect(var2, "wind"), !str_detect(var1, "wind")) %>% 
    select(var1, var2, r2) %>% 
    print(n=20)
## # A tibble: 561 × 3
##    var1                         var2                            r2
##    <chr>                        <chr>                        <dbl>
##  1 temperature_2m               apparent_temperature         0.392
##  2 et0_fao_evapotranspiration   src                          0.248
##  3 month                        src                          0.223
##  4 surface_pressure             et0_fao_evapotranspiration   0.208
##  5 year                         src                          0.207
##  6 weathercode                  src                          0.192
##  7 hour                         src                          0.185
##  8 relativehumidity_2m          src                          0.183
##  9 tod                          src                          0.179
## 10 snowfall                     src                          0.178
## 11 precipitation                src                          0.175
## 12 rain                         src                          0.172
## 13 cloudcover_low               src                          0.168
## 14 et0_fao_evapotranspiration   soil_moisture_100_to_255cm   0.165
## 15 surface_pressure             soil_moisture_100_to_255cm   0.159
## 16 soil_temperature_28_to_100cm soil_moisture_100_to_255cm   0.155
## 17 apparent_temperature         surface_pressure             0.154
## 18 soil_moisture_100_to_255cm   doy                          0.154
## 19 cloudcover                   src                          0.152
## 20 surface_pressure             soil_temperature_28_to_100cm 0.152
## # ℹ 541 more rows
# dfSmallR2WC %>% 
#     filter(var2!="precipitation", 
#            var1!="precipitation", 
#            !str_detect(var2, "cloudcover"), 
#            !str_detect(var1, "cloudcover")
#            ) %>% 
#     pivot_longer(cols=c(var1, var2)) %>% 
#     group_by(value) %>% 
#     summarize(across(r2, .fns=list("min"=min, "mu"=mean, "max"=max))) %>% 
#     ggplot(aes(x=fct_reorder(value, r2_mu))) + 
#     coord_flip() + 
#     geom_point(aes(y=r2_mu)) + 
#     geom_errorbar(aes(ymin=r2_min, ymax=r2_max)) + 
#     lims(y=c(NA, 1)) + 
#     geom_hline(yintercept=1, lty=2, color="red") +
#     labs(title="Accuracy in every 2-predictor model including self and one other", 
#          subtitle="Predicting weathercode (excluding variable paired with 'precipitation' or 'cloudcover')", 
#          y="Range of accuracy (min-mean-max)", 
#          x=NULL
#     )
# 
# dfSmallR2WC %>% 
#     arrange(desc(r2)) %>% 
#     filter(!str_detect(var2, "rain|snow|precip"), 
#            !str_detect(var1, "rain|snow|precip"),
#            !str_detect(var2, "cloudcover"), 
#            !str_detect(var1, "cloudcover")
#            ) %>% 
#     select(var1, var2, r2) %>% 
#     print(n=20)

# Null accuracy would pick the most frequent observation
# allCity %>% count(weathercode, sort=TRUE) %>% mutate(pct=n/sum(n))

The best predictor is high-level wind speed, and the next best predictor is ground-level wind gusts. Every other pair of predictors has R-squared on unseen data of at most ~40% (sometimes even negative)

Select combinations are explored using the full training dataset:

possLargeWS <- c("windspeed_100m", "windgusts_10m", "temperature_2m", "apparent_temperature")
possLargeWS
## [1] "windspeed_100m"       "windgusts_10m"        "temperature_2m"      
## [4] "apparent_temperature"
mtxLargeWS <- matrix(nrow=0, ncol=3)

for(idx1 in 1:(length(possLargeWS)-1)) {
    for(idx2 in (idx1+1):length(possLargeWS)) {
        r2LargeWS <- runFullRF(dfTrain=dfTrainCloud[,] %>% mutate(weathercode=factor(weathercode)), 
                               yVar="windspeed_10m", 
                               xVars=possLargeWS[c(idx1, idx2)], 
                               dfTest=dfTestCloud %>% mutate(weathercode=factor(weathercode)),
                               useLabel=keyLabel, 
                               useSub=stringr::str_to_sentence(keyLabel), 
                               isContVar=TRUE,
                               mtry=2,
                               makePlots=FALSE,
                               returnData=TRUE
                               )[["rfAcc"]][["r2"]]
        mtxLargeWS <- rbind(mtxLargeWS, c(idx1, idx2, r2LargeWS))
    }
}
## Growing trees.. Progress: 42%. Estimated remaining time: 42 seconds.
## Growing trees.. Progress: 80%. Estimated remaining time: 15 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 91.903% (RMSE 2.16 vs. 7.58 null)
## Growing trees.. Progress: 33%. Estimated remaining time: 1 minute, 2 seconds.
## Growing trees.. Progress: 66%. Estimated remaining time: 31 seconds.
## Growing trees.. Progress: 95%. Estimated remaining time: 4 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.473% (RMSE 2.68 vs. 7.58 null)
## Growing trees.. Progress: 28%. Estimated remaining time: 1 minute, 18 seconds.
## Growing trees.. Progress: 62%. Estimated remaining time: 38 seconds.
## Growing trees.. Progress: 96%. Estimated remaining time: 3 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.019% (RMSE 2.73 vs. 7.58 null)
## Growing trees.. Progress: 36%. Estimated remaining time: 54 seconds.
## Growing trees.. Progress: 74%. Estimated remaining time: 22 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 74.508% (RMSE 3.83 vs. 7.58 null)
## Growing trees.. Progress: 33%. Estimated remaining time: 1 minute, 2 seconds.
## Growing trees.. Progress: 70%. Estimated remaining time: 26 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 74.793% (RMSE 3.81 vs. 7.58 null)
## Growing trees.. Progress: 40%. Estimated remaining time: 46 seconds.
## Growing trees.. Progress: 76%. Estimated remaining time: 19 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.671% (RMSE 5.54 vs. 7.58 null)
dfLargeR2WS <- as.data.frame(mtxLargeWS) %>% 
    purrr::set_names(c("idx1", "idx2", "r2")) %>% 
    tibble::as_tibble() %>% 
    mutate(var1=possLargeWS[idx1], var2=possLargeWS[idx2], rn=row_number()) 
dfLargeR2WS %>% arrange(desc(r2)) %>% select(var1, var2, r2) %>% print(n=20)
## # A tibble: 6 × 3
##   var1           var2                    r2
##   <chr>          <chr>                <dbl>
## 1 windspeed_100m windgusts_10m        0.919
## 2 windspeed_100m temperature_2m       0.875
## 3 windspeed_100m apparent_temperature 0.870
## 4 windgusts_10m  apparent_temperature 0.748
## 5 windgusts_10m  temperature_2m       0.745
## 6 temperature_2m apparent_temperature 0.467

A model is run to predict evapotranspiration, at first allowing all predictors:

keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
rfET0Full <- runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022), 
                       yVar="et0_fao_evapotranspiration", 
                       xVars=c(varsTrain[!str_detect(varsTrain, "evapo")], "src", "month", "tod"), 
                       dfTest=allCity %>% filter(tt=="test", year==2022), 
                       useLabel=keyLabel, 
                       useSub=stringr::str_to_sentence(keyLabel), 
                       isContVar=TRUE,
                       rndTo=-1L,
                       refXY=TRUE,
                       returnData=TRUE
                       )
## Growing trees.. Progress: 13%. Estimated remaining time: 3 minutes, 31 seconds.
## Growing trees.. Progress: 25%. Estimated remaining time: 3 minutes, 7 seconds.
## Growing trees.. Progress: 38%. Estimated remaining time: 2 minutes, 35 seconds.
## Growing trees.. Progress: 52%. Estimated remaining time: 1 minute, 57 seconds.
## Growing trees.. Progress: 64%. Estimated remaining time: 1 minute, 28 seconds.
## Growing trees.. Progress: 77%. Estimated remaining time: 56 seconds.
## Growing trees.. Progress: 89%. Estimated remaining time: 26 seconds.
## Growing trees.. Progress: 100%. Estimated remaining time: 0 seconds.

## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.815% (RMSE 0.01 vs. 0.2 null)
## `geom_smooth()` using formula = 'y ~ x'

The model is highly effective at predicting evapotranspiration, primarily by using radiation variables

The linear model is run for evapotranspiration, using all predictors:

# Eliminate diffuse radiation due to rank-deficiency
lmET0Full <- lm(et0_fao_evapotranspiration ~ ., 
                data=allCity %>% 
                    filter(tt=="train", year<2022) %>% 
                    mutate(weathercode=factor(weathercode)) %>%
                    select(all_of(varsTrain)) %>% 
                    select(-diffuse_radiation)
                )
summary(lmET0Full)
## 
## Call:
## lm(formula = et0_fao_evapotranspiration ~ ., data = allCity %>% 
##     filter(tt == "train", year < 2022) %>% mutate(weathercode = factor(weathercode)) %>% 
##     select(all_of(varsTrain)) %>% select(-diffuse_radiation))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.32559 -0.01493  0.00419  0.01915  0.26105 
## 
## Coefficients:
##                                 Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)                   -7.562e-01  3.663e-02  -20.643  < 2e-16 ***
## hour                          -2.169e-04  9.607e-06  -22.576  < 2e-16 ***
## temperature_2m                -3.631e-03  9.148e-05  -39.685  < 2e-16 ***
## relativehumidity_2m           -5.373e-04  9.132e-06  -58.839  < 2e-16 ***
## dewpoint_2m                    1.469e-03  3.041e-05   48.292  < 2e-16 ***
## apparent_temperature           2.917e-03  7.690e-05   37.929  < 2e-16 ***
## pressure_msl                   1.042e-04  1.365e-05    7.633 2.30e-14 ***
## surface_pressure               6.140e-05  6.081e-06   10.096  < 2e-16 ***
## precipitation                  2.603e-02  1.574e-02    1.654 0.098223 .  
## rain                          -2.746e-02  1.575e-02   -1.743 0.081305 .  
## snowfall                      -4.760e-02  2.281e-02   -2.087 0.036930 *  
## cloudcover                    -1.115e-04  1.000e-05  -11.145  < 2e-16 ***
## cloudcover_low                -2.894e-05  5.162e-06   -5.606 2.08e-08 ***
## cloudcover_mid                 2.012e-05  3.856e-06    5.218 1.81e-07 ***
## cloudcover_high                2.493e-06  2.440e-06    1.022 0.306957    
## shortwave_radiation            4.744e-04  1.189e-06  398.899  < 2e-16 ***
## direct_radiation               2.226e-04  1.476e-06  150.816  < 2e-16 ***
## direct_normal_irradiance      -1.413e-04  5.139e-07 -274.899  < 2e-16 ***
## windspeed_10m                  1.943e-03  3.661e-05   53.065  < 2e-16 ***
## windspeed_100m                -7.108e-05  2.309e-05   -3.078 0.002081 ** 
## winddirection_10m              4.929e-06  9.329e-07    5.284 1.27e-07 ***
## winddirection_100m             6.691e-06  9.407e-07    7.112 1.14e-12 ***
## windgusts_10m                  7.022e-04  1.238e-05   56.717  < 2e-16 ***
## weathercode1                   2.305e-03  2.451e-04    9.405  < 2e-16 ***
## weathercode2                   5.252e-03  4.405e-04   11.922  < 2e-16 ***
## weathercode3                   7.297e-03  5.801e-04   12.578  < 2e-16 ***
## weathercode51                  6.326e-03  5.209e-04   12.143  < 2e-16 ***
## weathercode53                  3.490e-03  6.900e-04    5.058 4.23e-07 ***
## weathercode55                  3.565e-03  9.773e-04    3.648 0.000264 ***
## weathercode61                  2.875e-03  9.411e-04    3.055 0.002253 ** 
## weathercode63                  1.954e-03  1.548e-03    1.262 0.206807    
## weathercode65                  3.918e-03  4.239e-03    0.924 0.355287    
## weathercode71                  3.671e-03  1.048e-03    3.503 0.000460 ***
## weathercode73                  7.998e-03  1.518e-03    5.267 1.39e-07 ***
## weathercode75                  9.929e-03  4.232e-03    2.346 0.018984 *  
## vapor_pressure_deficit         4.726e-02  1.615e-04  292.563  < 2e-16 ***
## soil_temperature_0_to_7cm      1.573e-03  2.715e-05   57.947  < 2e-16 ***
## soil_temperature_7_to_28cm    -2.777e-03  4.571e-05  -60.763  < 2e-16 ***
## soil_temperature_28_to_100cm   6.820e-04  5.074e-05   13.439  < 2e-16 ***
## soil_temperature_100_to_255cm  9.320e-04  2.611e-05   35.693  < 2e-16 ***
## soil_moisture_0_to_7cm        -3.997e-02  1.572e-03  -25.424  < 2e-16 ***
## soil_moisture_7_to_28cm        2.651e-02  2.238e-03   11.846  < 2e-16 ***
## soil_moisture_28_to_100cm     -5.818e-02  1.683e-03  -34.574  < 2e-16 ***
## soil_moisture_100_to_255cm     4.955e-02  1.655e-03   29.937  < 2e-16 ***
## year                           3.002e-04  1.766e-05   16.994  < 2e-16 ***
## doy                           -1.664e-05  7.047e-07  -23.616  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03447 on 368064 degrees of freedom
## Multiple R-squared:  0.9689, Adjusted R-squared:  0.9688 
## F-statistic: 2.544e+05 on 45 and 368064 DF,  p-value: < 2.2e-16
allCity %>% 
    filter(tt=="test", year==2022) %>%
    mutate(weathercode=factor(weathercode)) %>%
    mutate(pred=predict(lmET0Full, newdata=.)) %>%
    summarize(meModel=mean((pred-et0_fao_evapotranspiration)**2), 
              meBase=mean((et0_fao_evapotranspiration-mean(et0_fao_evapotranspiration))**2), 
              r2=1-meModel/meBase, 
              rmse=sqrt(meModel)
              )
## # A tibble: 1 × 4
##   meModel meBase    r2   rmse
##     <dbl>  <dbl> <dbl>  <dbl>
## 1 0.00144 0.0392 0.963 0.0380
summary(lmET0Full)$coefficients %>% 
    as.data.frame() %>% 
    rownames_to_column("Variable") %>% 
    tibble::as_tibble() %>% 
    arrange(desc(abs(`t value`)))
## # A tibble: 46 × 5
##    Variable                    Estimate `Std. Error` `t value` `Pr(>|t|)`
##    <chr>                          <dbl>        <dbl>     <dbl>      <dbl>
##  1 shortwave_radiation         0.000474  0.00000119      399.           0
##  2 vapor_pressure_deficit      0.0473    0.000162        293.           0
##  3 direct_normal_irradiance   -0.000141  0.000000514    -275.           0
##  4 direct_radiation            0.000223  0.00000148      151.           0
##  5 soil_temperature_7_to_28cm -0.00278   0.0000457       -60.8          0
##  6 relativehumidity_2m        -0.000537  0.00000913      -58.8          0
##  7 soil_temperature_0_to_7cm   0.00157   0.0000272        57.9          0
##  8 windgusts_10m               0.000702  0.0000124        56.7          0
##  9 windspeed_10m               0.00194   0.0000366        53.1          0
## 10 dewpoint_2m                 0.00147   0.0000304        48.3          0
## # ℹ 36 more rows

Even with many confounders, the linear model largely identifies that radiation is a strong predictor for evapotranspiration. The random forest drives higher R-squared and improved RMSE (RMSE 0.01 RF vs. 0.04 linear)

Correlations between predictors and evapotranspiration are assessed:

sapply(varsTrain, FUN=function(x) cor(allCity$et0_fao_evapotranspiration, allCity[[x]])) %>% 
    as.data.frame() %>% 
    rownames_to_column("var") %>% 
    tibble::as_tibble() %>% 
    purrr::set_names(c("var", "cor")) %>% 
    ggplot(aes(x=fct_reorder(var, cor), y=cor)) + 
    geom_col(fill="lightblue") + 
    geom_text(data=~filter(., abs(cor)>0.2), aes(y=cor/2, label=round(cor, 2)), size=2.5) +
    coord_flip() + 
    labs(title="Correlation with evapotranspiration", 
         y="Correlation", 
         x=NULL
         ) + 
    lims(y=c(NA, 1))

allCity %>%
    select(et0_fao_evapotranspiration, 
           shortwave_radiation, 
           direct_radiation, 
           vapor_pressure_deficit, 
           soil_temperature_0_to_7cm
           ) %>%
    mutate(across(.cols=c(et0_fao_evapotranspiration), .fns=function(x) round(20*x)/20),
           across(.cols=c(vapor_pressure_deficit), .fns=function(x) round(4*x)/4),
           across(.cols=c(soil_temperature_0_to_7cm), .fns=function(x) round(x, 0)),
           across(.cols=c(shortwave_radiation, direct_radiation), .fns=function(x) round(x/25)*25),
           rn=row_number()
           ) %>%
    pivot_longer(cols=-c(rn, et0_fao_evapotranspiration)) %>%
    count(et0_fao_evapotranspiration, name, value) %>%
    ggplot(aes(x=value, y=et0_fao_evapotranspiration)) + 
    geom_point(aes(size=n), alpha=0.5) + 
    geom_smooth(aes(weight=n), method="lm") +
    facet_wrap(~name, scales="free_x") + 
    labs(x=NULL, title="Evapotranspiration vs. four potentially strong predictors")
## `geom_smooth()` using formula = 'y ~ x'

A correlation heatmap is produced, borrowing from the recipe provided by STHDA:

# Function copied from STHDA
reorder_cormat <- function(cormat){
    # Use correlation between variables as distance
    dd <- as.dist((1-cormat)/2)
    hc <- hclust(dd)
    cormat <-cormat[hc$order, hc$order]
}

# Create and order correlation matrix and convert to plot-friendly tibble
corAll <- cor(allCity[,varsTrain]) %>% reorder_cormat()
corAll[upper.tri(corAll)] <- NA
corAll <- corAll %>%
    reshape2::melt(na.rm=TRUE) %>%
    tibble::as_tibble()
corAll
## # A tibble: 666 × 3
##    Var1                          Var2                            value
##    <fct>                         <fct>                           <dbl>
##  1 soil_temperature_100_to_255cm soil_temperature_100_to_255cm  1     
##  2 doy                           soil_temperature_100_to_255cm  0.522 
##  3 dewpoint_2m                   soil_temperature_100_to_255cm  0.285 
##  4 soil_temperature_7_to_28cm    soil_temperature_100_to_255cm  0.640 
##  5 soil_temperature_28_to_100cm  soil_temperature_100_to_255cm  0.825 
##  6 soil_temperature_0_to_7cm     soil_temperature_100_to_255cm  0.516 
##  7 temperature_2m                soil_temperature_100_to_255cm  0.538 
##  8 apparent_temperature          soil_temperature_100_to_255cm  0.535 
##  9 winddirection_10m             soil_temperature_100_to_255cm -0.0725
## 10 winddirection_100m            soil_temperature_100_to_255cm -0.103 
## # ℹ 656 more rows
# Create heatmap
corAll %>%
    ggplot(aes(x=Var2, y=Var1)) + 
    geom_tile(aes(fill=value)) +
    scale_fill_gradient2(NULL, low = "blue", high = "red", mid = "white", midpoint = 0, limit = c(-1,1)) + 
    labs(x=NULL, y=NULL, title="Pearson correlation of weather variables") + 
    theme(axis.text.x=element_text(angle = 90, vjust = 1, hjust = 1))

The process is converted to functional form:

makeHeatMap <- function(df, 
                        vecSelect=NULL, 
                        groupSimilar=TRUE, 
                        upperTriOnly=TRUE, 
                        plotMap=TRUE, 
                        returnData=FALSE
                        ) {

    # FUNCTION ARGUMENTS:    
    # df: the data frame or tibble
    # vecSelect: vector for variables to keep c('keep1', "keep2", ...), NULL means keep all
    # groupSimilar: boolean, should similar (highly correlated) variables be placed nearby each other?
    # upperTriOnly: boolean, should only the upper triangle be kept?
    # plotMap: boolean, should the heatmap be plotted?
    # returnData: boolean, should the correlation data driving the heatmap be returned
    
    # Create correlations
    df <- df %>%
        colSelector(vecSelect=vecSelect) %>%
        cor()
    
    # Reorder if requested
    if(isTRUE(groupSimilar)) df <- df %>% reorder_cormat()

    # Use only the upper triangle if requested
    if(isTRUE(upperTriOnly)) df[upper.tri(df)] <- NA
    
    # Convert to tidy tibble
    df <- df %>%
        reshape2::melt(na.rm=TRUE) %>%
        tibble::as_tibble()
    
    # Plot map if requested
    if(isTRUE(plotMap)) {
        p1 <- df %>%
            ggplot(aes(x=Var2, y=Var1)) + 
            geom_tile(aes(fill=value)) +
            scale_fill_gradient2(NULL, low="blue", high="red", mid="white", midpoint=0, limit=c(-1, 1)) + 
            labs(x=NULL, y=NULL, title="Pearson correlation of key variables") + 
            theme(axis.text.x=element_text(angle = 90, vjust = 1, hjust = 1))
        print(p1)
    }
    
    # Return data if requested
    if(isTRUE(returnData)) return(df)
    
}

The functional form is tested:

# Default function
makeHeatMap(allCity, vecSelect=varsTrain)

# Both triangles and return data
makeHeatMap(allCity, vecSelect=varsTrain, upperTriOnly=FALSE, returnData=TRUE)

## # A tibble: 1,296 × 3
##    Var1                          Var2                            value
##    <fct>                         <fct>                           <dbl>
##  1 soil_temperature_100_to_255cm soil_temperature_100_to_255cm  1     
##  2 doy                           soil_temperature_100_to_255cm  0.522 
##  3 dewpoint_2m                   soil_temperature_100_to_255cm  0.285 
##  4 soil_temperature_7_to_28cm    soil_temperature_100_to_255cm  0.640 
##  5 soil_temperature_28_to_100cm  soil_temperature_100_to_255cm  0.825 
##  6 soil_temperature_0_to_7cm     soil_temperature_100_to_255cm  0.516 
##  7 temperature_2m                soil_temperature_100_to_255cm  0.538 
##  8 apparent_temperature          soil_temperature_100_to_255cm  0.535 
##  9 winddirection_10m             soil_temperature_100_to_255cm -0.0725
## 10 winddirection_100m            soil_temperature_100_to_255cm -0.103 
## # ℹ 1,286 more rows
# No grouping of similar
makeHeatMap(allCity, vecSelect=varsTrain, groupSimilar=FALSE)

# Data only
makeHeatMap(allCity, vecSelect=varsTrain, plotMap=FALSE, returnData=TRUE)
## # A tibble: 666 × 3
##    Var1                          Var2                            value
##    <fct>                         <fct>                           <dbl>
##  1 soil_temperature_100_to_255cm soil_temperature_100_to_255cm  1     
##  2 doy                           soil_temperature_100_to_255cm  0.522 
##  3 dewpoint_2m                   soil_temperature_100_to_255cm  0.285 
##  4 soil_temperature_7_to_28cm    soil_temperature_100_to_255cm  0.640 
##  5 soil_temperature_28_to_100cm  soil_temperature_100_to_255cm  0.825 
##  6 soil_temperature_0_to_7cm     soil_temperature_100_to_255cm  0.516 
##  7 temperature_2m                soil_temperature_100_to_255cm  0.538 
##  8 apparent_temperature          soil_temperature_100_to_255cm  0.535 
##  9 winddirection_10m             soil_temperature_100_to_255cm -0.0725
## 10 winddirection_100m            soil_temperature_100_to_255cm -0.103 
## # ℹ 656 more rows

Each variable is run through the random forest standalone, using a smaller training dataset:

# Variables to explore
useET0 <- c(varsTrain[!str_detect(varsTrain, "evapo")], "src", "tod", "month")
useET0
##  [1] "hour"                          "temperature_2m"               
##  [3] "relativehumidity_2m"           "dewpoint_2m"                  
##  [5] "apparent_temperature"          "pressure_msl"                 
##  [7] "surface_pressure"              "precipitation"                
##  [9] "rain"                          "snowfall"                     
## [11] "cloudcover"                    "cloudcover_low"               
## [13] "cloudcover_mid"                "cloudcover_high"              
## [15] "shortwave_radiation"           "direct_radiation"             
## [17] "direct_normal_irradiance"      "diffuse_radiation"            
## [19] "windspeed_10m"                 "windspeed_100m"               
## [21] "winddirection_10m"             "winddirection_100m"           
## [23] "windgusts_10m"                 "weathercode"                  
## [25] "vapor_pressure_deficit"        "soil_temperature_0_to_7cm"    
## [27] "soil_temperature_7_to_28cm"    "soil_temperature_28_to_100cm" 
## [29] "soil_temperature_100_to_255cm" "soil_moisture_0_to_7cm"       
## [31] "soil_moisture_7_to_28cm"       "soil_moisture_28_to_100cm"    
## [33] "soil_moisture_100_to_255cm"    "year"                         
## [35] "doy"                           "src"                          
## [37] "tod"                           "month"
# Subsets to use (dfTrainCloud and dfTestCloud created previously)
set.seed(24092614)
idxSmallET0 <- sample(1:nrow(dfTrainCloud), 5000, replace=FALSE)
mtxSmallET0 <- matrix(nrow=0, ncol=3)

# Map each variable to file
rfET0OneSmall <- map_dfr(.x=useET0, 
                         .f=function(x) runFullRF(dfTrain=dfTrainCloud[idxSmallET0,], 
                                                  yVar="et0_fao_evapotranspiration", 
                                                  xVars=x, 
                                                  dfTest=dfTestCloud, 
                                                  isContVar=TRUE,
                                                  makePlots=FALSE,
                                                  returnData=TRUE
                                                  )[["rfAcc"]] %>%
                             t() %>%
                             as_tibble(), 
                         .id="varNum"
                         ) %>%
    mutate(varName=useET0[as.numeric(varNum)])
## 
## R-squared of test data is: 57.458% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of test data is: 38.141% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of test data is: 32.273% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of test data is: -10.614% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of test data is: 27.013% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of test data is: -1.641% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of test data is: -8.885% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of test data is: 1.247% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of test data is: 1.01% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of test data is: 0.362% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of test data is: 4.068% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of test data is: 3.073% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of test data is: 1.834% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of test data is: -1.121% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of test data is: 84.563% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of test data is: 80.886% (RMSE 0.09 vs. 0.2 null)
## 
## R-squared of test data is: 56% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of test data is: 57.067% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of test data is: -4.527% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of test data is: -7.115% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of test data is: -0.486% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of test data is: -2.836% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of test data is: 9.798% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of test data is: 4.918% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of test data is: 49.596% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of test data is: 35.237% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of test data is: 3.488% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of test data is: 3.094% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of test data is: -3.258% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of test data is: -3.642% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of test data is: -8.476% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of test data is: -8.737% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of test data is: -6.036% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of test data is: -1.33% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of test data is: 4.445% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of test data is: 4.815% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of test data is: 33.586% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of test data is: 10.331% (RMSE 0.19 vs. 0.2 null)
rfET0OneSmall %>% arrange(desc(r2))
## # A tibble: 38 × 5
##    varNum mseNull msePred    r2 varName                  
##    <chr>    <dbl>   <dbl> <dbl> <chr>                    
##  1 15      0.0392 0.00606 0.846 shortwave_radiation      
##  2 16      0.0392 0.00750 0.809 direct_radiation         
##  3 1       0.0392 0.0167  0.575 hour                     
##  4 18      0.0392 0.0168  0.571 diffuse_radiation        
##  5 17      0.0392 0.0173  0.560 direct_normal_irradiance 
##  6 25      0.0392 0.0198  0.496 vapor_pressure_deficit   
##  7 2       0.0392 0.0243  0.381 temperature_2m           
##  8 26      0.0392 0.0254  0.352 soil_temperature_0_to_7cm
##  9 37      0.0392 0.0261  0.336 tod                      
## 10 3       0.0392 0.0266  0.323 relativehumidity_2m      
## # ℹ 28 more rows

Each combination of two variables is run through the random forest, using a smaller training dataset:

# Variables to explore
useET0 <- c(varsTrain[!str_detect(varsTrain, "evapo")], "src", "tod", "month")
useET0
##  [1] "hour"                          "temperature_2m"               
##  [3] "relativehumidity_2m"           "dewpoint_2m"                  
##  [5] "apparent_temperature"          "pressure_msl"                 
##  [7] "surface_pressure"              "precipitation"                
##  [9] "rain"                          "snowfall"                     
## [11] "cloudcover"                    "cloudcover_low"               
## [13] "cloudcover_mid"                "cloudcover_high"              
## [15] "shortwave_radiation"           "direct_radiation"             
## [17] "direct_normal_irradiance"      "diffuse_radiation"            
## [19] "windspeed_10m"                 "windspeed_100m"               
## [21] "winddirection_10m"             "winddirection_100m"           
## [23] "windgusts_10m"                 "weathercode"                  
## [25] "vapor_pressure_deficit"        "soil_temperature_0_to_7cm"    
## [27] "soil_temperature_7_to_28cm"    "soil_temperature_28_to_100cm" 
## [29] "soil_temperature_100_to_255cm" "soil_moisture_0_to_7cm"       
## [31] "soil_moisture_7_to_28cm"       "soil_moisture_28_to_100cm"    
## [33] "soil_moisture_100_to_255cm"    "year"                         
## [35] "doy"                           "src"                          
## [37] "tod"                           "month"
# Subsets to use (dfTrainCloud and dfTestCloud created previously)
set.seed(24092715)
idxSmallET0 <- sample(1:nrow(dfTrainCloud), 5000, replace=FALSE)
mtxSmallET0 <- matrix(nrow=0, ncol=3)

# Run each combination of variables
for(idx1 in 1:(length(useET0)-1)) {
    for(idx2 in (idx1+1):length(useET0)) {
        r2SmallET0 <- runFullRF(dfTrain=dfTrainCloud[idxSmallET0,] %>% mutate(weathercode=factor(weathercode)), 
                                yVar="et0_fao_evapotranspiration", 
                                xVars=useET0[c(idx1, idx2)], 
                                dfTest=dfTestCloud %>% mutate(weathercode=factor(weathercode)), 
                                useLabel=keyLabel, 
                                useSub=stringr::str_to_sentence(keyLabel), 
                                isContVar=TRUE,
                                mtry=2,
                                makePlots=FALSE,
                                returnData=TRUE
                                )[["rfAcc"]][["r2"]]
        mtxSmallET0 <- rbind(mtxSmallET0, c(idx1, idx2, r2SmallET0))
    }
}
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 83.756% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.056% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.785% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 77.574% (RMSE 0.09 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.536% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.156% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 60.635% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.874% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.354% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.645% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.692% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.088% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.795% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.915% (RMSE 0.07 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.953% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.097% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.661% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.379% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.851% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.443% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.029% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.756% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.826% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.971% (RMSE 0.07 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.86% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 77.659% (RMSE 0.09 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.751% (RMSE 0.11 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.246% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.275% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.177% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.355% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.361% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.366% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.891% (RMSE 0.1 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.896% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.479% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 76.92% (RMSE 0.1 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.46% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.698% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.707% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.062% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.834% (RMSE 0.15 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.947% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.958% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.068% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.713% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.498% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.417% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.572% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.545% (RMSE 0.05 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.533% (RMSE 0.05 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.718% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 78.987% (RMSE 0.09 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.095% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.852% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.928% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.416% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.39% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.52% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.67% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.546% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.738% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.881% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.364% (RMSE 0.15 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.255% (RMSE 0.15 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.421% (RMSE 0.15 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.094% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.413% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.777% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.623% (RMSE 0.15 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.196% (RMSE 0.15 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.077% (RMSE 0.11 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.825% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.984% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.552% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.737% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.695% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.458% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.036% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.215% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.905% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.548% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.225% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.804% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 89.312% (RMSE 0.06 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.26% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.004% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 74.049% (RMSE 0.1 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.044% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.952% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.11% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.6% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.296% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.134% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.228% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.789% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.902% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.246% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.256% (RMSE 0.18 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.673% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.885% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.832% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.952% (RMSE 0.18 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.044% (RMSE 0.18 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.868% (RMSE 0.15 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.488% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.8% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.889% (RMSE 0.15 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.849% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.101% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.038% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.617% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.067% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.687% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.767% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.121% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.248% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.837% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.552% (RMSE 0.07 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.402% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 68.272% (RMSE 0.11 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.685% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.589% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.142% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.501% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.063% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.039% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.481% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.154% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.731% (RMSE 0.15 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.682% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.728% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.347% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.003% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.728% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -20.208% (RMSE 0.22 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.177% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -20.135% (RMSE 0.22 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.216% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.722% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.494% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.404% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.234% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.783% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.287% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.279% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.487% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.161% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.257% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.882% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.41% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 91.88% (RMSE 0.06 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 90.118% (RMSE 0.06 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 83.038% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 74.103% (RMSE 0.1 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.421% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.298% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.605% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.52% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.654% (RMSE 0.15 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.109% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.604% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.697% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.228% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.672% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.93% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.569% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.68% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.639% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.972% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.825% (RMSE 0.18 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.251% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.201% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.727% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.507% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.399% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.82% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.683% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.483% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.822% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.109% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.084% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.484% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.631% (RMSE 0.07 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.073% (RMSE 0.07 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 70.363% (RMSE 0.11 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.572% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.698% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.939% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.044% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.681% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.187% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.465% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.051% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.908% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.253% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.004% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.692% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.289% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.956% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.476% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.066% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.163% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.453% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.008% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.776% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.266% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.984% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.171% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.041% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.158% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.448% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.498% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.293% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 89.068% (RMSE 0.07 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.808% (RMSE 0.07 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 68.86% (RMSE 0.11 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 63.655% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.92% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.161% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.283% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.488% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.45% (RMSE 0.18 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.922% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.224% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.005% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.926% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.59% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.391% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.573% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.127% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.064% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.804% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.965% (RMSE 0.22 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.044% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.797% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.023% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.866% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.447% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.469% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.41% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.327% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.73% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.897% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.781% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 81.207% (RMSE 0.09 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.419% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.609% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.948% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.343% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.656% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.625% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.446% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.85% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.58% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.46% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.402% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.395% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.821% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.053% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.725% (RMSE 0.22 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.882% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.45% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.146% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.584% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.487% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.459% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.036% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.464% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.414% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.475% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.689% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.048% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.79% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 81.18% (RMSE 0.09 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.471% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.15% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.891% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.737% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.772% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.763% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.792% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.851% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.686% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.392% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.355% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.367% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.956% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.077% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.783% (RMSE 0.22 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.46% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.724% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.464% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.554% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.434% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.931% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.919% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.836% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.417% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.09% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.966% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.746% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 80.844% (RMSE 0.09 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.475% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.368% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.73% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.562% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.278% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.027% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.026% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.779% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.314% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.103% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.821% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.496% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.03% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.974% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.405% (RMSE 0.22 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.247% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.474% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.243% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.394% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.875% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.346% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.268% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.577% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.078% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.932% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.73% (RMSE 0.07 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.894% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 63.046% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.719% (RMSE 0.1 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.53% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.032% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.5% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.997% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.123% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.243% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.52% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.67% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.039% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.506% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.281% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.267% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.136% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.429% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.379% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.455% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.074% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.616% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.042% (RMSE 0.15 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.719% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.944% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.751% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.349% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 81.525% (RMSE 0.09 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.706% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.76% (RMSE 0.11 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.796% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.99% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.091% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.411% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.268% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.357% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.812% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.463% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.996% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.249% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.55% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.56% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.235% (RMSE 0.22 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.332% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.76% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.637% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.177% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.927% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.119% (RMSE 0.15 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.48% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.289% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.386% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.373% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.202% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 63.152% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.761% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.874% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.618% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.409% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.144% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.219% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.152% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.381% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.807% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.99% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.256% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.042% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.521% (RMSE 0.22 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.559% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.955% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.342% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.037% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.972% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.676% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.597% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.848% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.765% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.666% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.4% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.566% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.856% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.935% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.407% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.815% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.679% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.97% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.473% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.819% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.36% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.769% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.495% (RMSE 0.22 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.787% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.632% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.478% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.884% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.428% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.811% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.973% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.402% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.154% (RMSE 0.07 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.302% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.631% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.284% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.847% (RMSE 0.07 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.725% (RMSE 0.07 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.825% (RMSE 0.07 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.234% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 95.475% (RMSE 0.04 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.61% (RMSE 0.05 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.47% (RMSE 0.05 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 90.295% (RMSE 0.06 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.342% (RMSE 0.07 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.804% (RMSE 0.07 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.041% (RMSE 0.07 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.626% (RMSE 0.07 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.48% (RMSE 0.07 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.412% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.247% (RMSE 0.07 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.763% (RMSE 0.07 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.973% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.136% (RMSE 0.07 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.797% (RMSE 0.07 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.185% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.559% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.111% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.613% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.594% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.793% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.106% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.357% (RMSE 0.05 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 91.737% (RMSE 0.06 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 90.184% (RMSE 0.06 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.615% (RMSE 0.07 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.467% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.064% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 83.616% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 83.346% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.826% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 81.271% (RMSE 0.09 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.614% (RMSE 0.07 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 83.678% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 80.814% (RMSE 0.09 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.26% (RMSE 0.07 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.952% (RMSE 0.09 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.089% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.41% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.229% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.385% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 63.989% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.272% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 81.351% (RMSE 0.09 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.83% (RMSE 0.08 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.068% (RMSE 0.09 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.486% (RMSE 0.1 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 63.606% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.292% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.533% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.851% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.39% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.385% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 76.041% (RMSE 0.1 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.167% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.611% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 74.62% (RMSE 0.1 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.359% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.52% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.682% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.028% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.386% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 69.924% (RMSE 0.11 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.185% (RMSE 0.07 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 78.399% (RMSE 0.09 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 70.684% (RMSE 0.11 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.335% (RMSE 0.11 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.971% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.893% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.455% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.128% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.119% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.575% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.282% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.193% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.964% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.813% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.841% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.761% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.982% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.405% (RMSE 0.18 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.357% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.876% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.372% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.991% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.119% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.056% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.44% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.338% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.947% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.856% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -23.471% (RMSE 0.22 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.04% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.719% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.155% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.195% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.629% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.346% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.164% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.336% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.085% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.342% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.136% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.431% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.052% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.633% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.719% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.591% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.309% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -21.865% (RMSE 0.22 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.498% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.519% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.008% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.22% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.294% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.825% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.75% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.567% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.817% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.872% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.28% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.419% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.869% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.573% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.559% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.558% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.697% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.295% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.566% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.725% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.576% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.589% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.231% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.676% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.033% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.005% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.962% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.012% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.168% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.687% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.11% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.541% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.882% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.013% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.177% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.733% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.371% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.032% (RMSE 0.18 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.449% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.921% (RMSE 0.15 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.612% (RMSE 0.18 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.041% (RMSE 0.18 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.55% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.7% (RMSE 0.18 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.993% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.142% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.914% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.095% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.412% (RMSE 0.18 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.648% (RMSE 0.18 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.914% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.676% (RMSE 0.18 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.352% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.407% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.304% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.338% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.931% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.261% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.124% (RMSE 0.22 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.798% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.297% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.318% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.349% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.921% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.256% (RMSE 0.15 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.012% (RMSE 0.18 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.776% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.343% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.132% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.519% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.799% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.635% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.738% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.49% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.041% (RMSE 0.15 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.814% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.113% (RMSE 0.13 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 74.174% (RMSE 0.1 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.683% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.666% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.062% (RMSE 0.15 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.738% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.077% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.854% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.329% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.229% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.583% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.649% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.443% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.765% (RMSE 0.12 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.541% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.882% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.31% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.126% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.931% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.742% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.869% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.179% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.545% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.035% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.311% (RMSE 0.14 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.575% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.081% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.854% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.89% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.062% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.938% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.679% (RMSE 0.22 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.01% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.616% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.964% (RMSE 0.15 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.124% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.544% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.014% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.167% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.994% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -26.114% (RMSE 0.22 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.002% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.151% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.167% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.083% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.74% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.46% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.755% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.333% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.007% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.319% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.69% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.741% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.961% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.627% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -23.633% (RMSE 0.22 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.245% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -22.689% (RMSE 0.22 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.286% (RMSE 0.18 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.527% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.082% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.402% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.488% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.446% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.777% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.511% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.856% (RMSE 0.22 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.1% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.46% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.806% (RMSE 0.17 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.933% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.709% (RMSE 0.21 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.47% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.522% (RMSE 0.16 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.566% (RMSE 0.19 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.438% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.076% (RMSE 0.15 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.852% (RMSE 0.2 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.045% (RMSE 0.15 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.824% (RMSE 0.18 vs. 0.2 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.537% (RMSE 0.14 vs. 0.2 null)

R-squared by pairs of metrics is explored:

dfSmallR2ET0 <- as.data.frame(mtxSmallET0) %>% 
    purrr::set_names(c("idx1", "idx2", "r2")) %>% 
    tibble::as_tibble() %>% 
    mutate(var1=useET0[idx1], var2=useET0[idx2], rn=row_number()) 
dfSmallR2ET0 %>% arrange(desc(r2)) %>% select(var1, var2, r2) %>% print(n=20)
## # A tibble: 703 × 3
##    var1                 var2                             r2
##    <chr>                <chr>                         <dbl>
##  1 shortwave_radiation  vapor_pressure_deficit        0.955
##  2 temperature_2m       shortwave_radiation           0.945
##  3 shortwave_radiation  soil_temperature_0_to_7cm     0.936
##  4 temperature_2m       direct_radiation              0.925
##  5 shortwave_radiation  soil_temperature_7_to_28cm    0.925
##  6 direct_radiation     vapor_pressure_deficit        0.924
##  7 apparent_temperature shortwave_radiation           0.919
##  8 direct_radiation     soil_temperature_0_to_7cm     0.917
##  9 shortwave_radiation  soil_temperature_28_to_100cm  0.903
## 10 direct_radiation     soil_temperature_7_to_28cm    0.902
## 11 apparent_temperature direct_radiation              0.901
## 12 relativehumidity_2m  shortwave_radiation           0.893
## 13 surface_pressure     shortwave_radiation           0.891
## 14 pressure_msl         shortwave_radiation           0.886
## 15 shortwave_radiation  soil_temperature_100_to_255cm 0.883
## 16 shortwave_radiation  doy                           0.882
## 17 shortwave_radiation  month                         0.881
## 18 hour                 vapor_pressure_deficit        0.880
## 19 shortwave_radiation  soil_moisture_0_to_7cm        0.878
## 20 shortwave_radiation  src                           0.878
## # ℹ 683 more rows
dfSmallR2ET0 %>% 
    pivot_longer(cols=c(var1, var2)) %>% 
    group_by(value) %>% 
    summarize(across(r2, .fns=list("min"=min, "mu"=mean, "max"=max))) %>% 
    ggplot(aes(x=fct_reorder(value, r2_mu))) + 
    coord_flip() + 
    geom_point(aes(y=r2_mu)) + 
    geom_errorbar(aes(ymin=r2_min, ymax=r2_max)) + 
    lims(y=c(NA, 1)) + 
    geom_hline(yintercept=1, lty=2, color="red") +
    labs(title="R-squared in every 2-predictor model including self and one other", 
         subtitle="Predicting evapotranspiration", 
         y="Range of R2 (min-mean-max)", 
         x=NULL
    )

dfSmallR2ET0 %>% 
    arrange(desc(r2)) %>% 
    filter(!str_detect(var2, "radi"), !str_detect(var1, "radi")) %>% 
    select(var1, var2, r2) %>% 
    print(n=20)
## # A tibble: 561 × 3
##    var1                      var2                            r2
##    <chr>                     <chr>                        <dbl>
##  1 hour                      vapor_pressure_deficit       0.880
##  2 hour                      temperature_2m               0.838
##  3 hour                      soil_temperature_0_to_7cm    0.829
##  4 hour                      soil_temperature_7_to_28cm   0.777
##  5 hour                      apparent_temperature         0.776
##  6 hour                      month                        0.769
##  7 vapor_pressure_deficit    tod                          0.742
##  8 hour                      doy                          0.729
##  9 hour                      soil_temperature_28_to_100cm 0.678
## 10 temperature_2m            tod                          0.671
## 11 hour                      relativehumidity_2m          0.661
## 12 hour                      weathercode                  0.658
## 13 soil_temperature_0_to_7cm tod                          0.658
## 14 hour                      src                          0.649
## 15 windgusts_10m             vapor_pressure_deficit       0.644
## 16 hour                      cloudcover_low               0.627
## 17 hour                      cloudcover                   0.626
## 18 soil_temperature_0_to_7cm soil_temperature_7_to_28cm   0.617
## 19 hour                      surface_pressure             0.612
## 20 hour                      cloudcover_mid               0.611
## # ℹ 541 more rows
dfSmallR2ET0 %>% 
    filter(!str_detect(var2, "radi"), !str_detect(var1, "radi")) %>% 
    pivot_longer(cols=c(var1, var2)) %>% 
    group_by(value) %>% 
    summarize(across(r2, .fns=list("min"=min, "mu"=mean, "max"=max))) %>% 
    ggplot(aes(x=fct_reorder(value, r2_mu))) + 
    coord_flip() + 
    geom_point(aes(y=r2_mu)) + 
    geom_errorbar(aes(ymin=r2_min, ymax=r2_max)) + 
    lims(y=c(NA, 1)) + 
    geom_hline(yintercept=1, lty=2, color="red") +
    labs(title="R-squared in every 2-predictor model including self and one other", 
         subtitle="Predicting evapotranspiration (excluding radiation variables)", 
         y="Range of R2 (min-mean-max)", 
         x=NULL
    )

# Null accuracy would pick the most frequent observation
# allCity %>% count(weathercode, sort=TRUE) %>% mutate(pct=n/sum(n))

Radiation is generally the best predictor for evapotranspiration, though hour and vapor pressure deficit together also drive ~90% R-squared

Select combinations are explored using the full training dataset:

possLargeET0 <- c("shortwave_radiation", "direct_radiation", "temperature_2m", "hour", "vapor_pressure_deficit")
possLargeET0
## [1] "shortwave_radiation"    "direct_radiation"       "temperature_2m"        
## [4] "hour"                   "vapor_pressure_deficit"
mtxLargeET0 <- matrix(nrow=0, ncol=3)

for(idx1 in 1:(length(possLargeET0)-1)) {
    for(idx2 in (idx1+1):length(possLargeET0)) {
        r2LargeET0 <- runFullRF(dfTrain=dfTrainCloud[,] %>% mutate(weathercode=factor(weathercode)), 
                                yVar="et0_fao_evapotranspiration", 
                                xVars=possLargeET0[c(idx1, idx2)], 
                                dfTest=dfTestCloud %>% mutate(weathercode=factor(weathercode)),
                                useLabel=keyLabel, 
                                useSub=stringr::str_to_sentence(keyLabel), 
                                isContVar=TRUE,
                                mtry=2,
                                makePlots=FALSE,
                                returnData=TRUE
                                )[["rfAcc"]][["r2"]]
        mtxLargeET0 <- rbind(mtxLargeET0, c(idx1, idx2, r2LargeET0))
    }
}
## Growing trees.. Progress: 46%. Estimated remaining time: 36 seconds.
## Growing trees.. Progress: 86%. Estimated remaining time: 10 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.016% (RMSE 0.08 vs. 0.2 null)
## Growing trees.. Progress: 30%. Estimated remaining time: 1 minute, 10 seconds.
## Growing trees.. Progress: 62%. Estimated remaining time: 38 seconds.
## Growing trees.. Progress: 96%. Estimated remaining time: 4 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.889% (RMSE 0.04 vs. 0.2 null)
## Growing trees.. Progress: 51%. Estimated remaining time: 29 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.281% (RMSE 0.07 vs. 0.2 null)
## Growing trees.. Progress: 40%. Estimated remaining time: 47 seconds.
## Growing trees.. Progress: 80%. Estimated remaining time: 15 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 95.669% (RMSE 0.04 vs. 0.2 null)
## Growing trees.. Progress: 37%. Estimated remaining time: 52 seconds.
## Growing trees.. Progress: 79%. Estimated remaining time: 16 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.789% (RMSE 0.05 vs. 0.2 null)
## Growing trees.. Progress: 67%. Estimated remaining time: 15 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.443% (RMSE 0.08 vs. 0.2 null)
## Growing trees.. Progress: 31%. Estimated remaining time: 1 minute, 10 seconds.
## Growing trees.. Progress: 62%. Estimated remaining time: 38 seconds.
## Growing trees.. Progress: 93%. Estimated remaining time: 7 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.434% (RMSE 0.05 vs. 0.2 null)
## Growing trees.. Progress: 48%. Estimated remaining time: 33 seconds.
## Growing trees.. Progress: 95%. Estimated remaining time: 3 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.423% (RMSE 0.07 vs. 0.2 null)
## Growing trees.. Progress: 29%. Estimated remaining time: 1 minute, 15 seconds.
## Growing trees.. Progress: 61%. Estimated remaining time: 39 seconds.
## Growing trees.. Progress: 88%. Estimated remaining time: 12 seconds.
## Growing trees.. Progress: 100%. Estimated remaining time: 0 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.481% (RMSE 0.14 vs. 0.2 null)
## Growing trees.. Progress: 53%. Estimated remaining time: 27 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 89.855% (RMSE 0.06 vs. 0.2 null)
dfLargeR2ET0 <- as.data.frame(mtxLargeET0) %>% 
    purrr::set_names(c("idx1", "idx2", "r2")) %>% 
    tibble::as_tibble() %>% 
    mutate(var1=possLargeET0[idx1], var2=possLargeET0[idx2], rn=row_number()) 
dfLargeR2ET0 %>% arrange(desc(r2)) %>% select(var1, var2, r2) %>% print(n=20)
## # A tibble: 10 × 3
##    var1                var2                      r2
##    <chr>               <chr>                  <dbl>
##  1 shortwave_radiation vapor_pressure_deficit 0.957
##  2 shortwave_radiation temperature_2m         0.949
##  3 direct_radiation    temperature_2m         0.928
##  4 direct_radiation    vapor_pressure_deficit 0.924
##  5 hour                vapor_pressure_deficit 0.899
##  6 shortwave_radiation hour                   0.883
##  7 temperature_2m      hour                   0.864
##  8 shortwave_radiation direct_radiation       0.850
##  9 direct_radiation    hour                   0.844
## 10 temperature_2m      vapor_pressure_deficit 0.525

A model is run to predict relative humidity, at first allowing all predictors:

keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
rfRHFull <- runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022), 
                      yVar="relativehumidity_2m", 
                      xVars=c(varsTrain[!str_detect(varsTrain, "humidity_2m$")], "src", "month", "tod"), 
                      dfTest=allCity %>% filter(tt=="test", year==2022), 
                      useLabel=keyLabel, 
                      useSub=stringr::str_to_sentence(keyLabel), 
                      isContVar=TRUE,
                      rndTo=-1L,
                      refXY=TRUE,
                      returnData=TRUE
                      )
## Growing trees.. Progress: 14%. Estimated remaining time: 3 minutes, 7 seconds.
## Growing trees.. Progress: 27%. Estimated remaining time: 2 minutes, 51 seconds.
## Growing trees.. Progress: 39%. Estimated remaining time: 2 minutes, 27 seconds.
## Growing trees.. Progress: 52%. Estimated remaining time: 1 minute, 58 seconds.
## Growing trees.. Progress: 65%. Estimated remaining time: 1 minute, 25 seconds.
## Growing trees.. Progress: 77%. Estimated remaining time: 56 seconds.
## Growing trees.. Progress: 91%. Estimated remaining time: 22 seconds.

## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.502% (RMSE 1.86 vs. 26.4 null)
## `geom_smooth()` using formula = 'y ~ x'

The model is highly effective at predicting relative humidity, primarily by using vapor pressure deficit. There is a mathematical formula that maps T, D to either RH or VPD, and it is notable that the initial model places more emphasis on the derived predictor (VPD) rather than the two base predictors (T, D)

The linear model is run for relative humidity, using all predictors:

# Eliminate diffuse radiation due to rank-deficiency
lmRHFull <- lm(relativehumidity_2m ~ ., 
               data=allCity %>% 
                   filter(tt=="train", year<2022) %>% 
                   mutate(weathercode=factor(weathercode)) %>%
                   select(all_of(varsTrain)) %>% 
                   select(-diffuse_radiation)
               )
summary(lmRHFull)
## 
## Call:
## lm(formula = relativehumidity_2m ~ ., data = allCity %>% filter(tt == 
##     "train", year < 2022) %>% mutate(weathercode = factor(weathercode)) %>% 
##     select(all_of(varsTrain)) %>% select(-diffuse_radiation))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.260  -4.302  -0.479   3.717  53.931 
## 
## Coefficients:
##                                 Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)                   -1.030e+02  6.583e+00  -15.639  < 2e-16 ***
## hour                          -2.167e-01  1.690e-03 -128.207  < 2e-16 ***
## temperature_2m                -4.008e+00  1.509e-02 -265.635  < 2e-16 ***
## dewpoint_2m                    2.350e+00  3.878e-03  606.146  < 2e-16 ***
## apparent_temperature           1.350e+00  1.366e-02   98.811  < 2e-16 ***
## pressure_msl                  -1.229e-01  2.445e-03  -50.279  < 2e-16 ***
## surface_pressure              -9.963e-03  1.093e-03   -9.119  < 2e-16 ***
## precipitation                  2.126e+01  2.828e+00    7.517 5.63e-14 ***
## rain                          -2.109e+01  2.830e+00   -7.454 9.10e-14 ***
## snowfall                      -2.824e+01  4.098e+00   -6.891 5.54e-12 ***
## cloudcover                     2.290e-02  1.797e-03   12.747  < 2e-16 ***
## cloudcover_low                 5.843e-02  9.225e-04   63.342  < 2e-16 ***
## cloudcover_mid                -2.213e-02  6.919e-04  -31.984  < 2e-16 ***
## cloudcover_high                5.343e-04  4.384e-04    1.219    0.223    
## shortwave_radiation           -1.934e-02  2.537e-04  -76.225  < 2e-16 ***
## direct_radiation               2.090e-02  2.710e-04   77.119  < 2e-16 ***
## direct_normal_irradiance      -2.559e-03  1.013e-04  -25.262  < 2e-16 ***
## windspeed_10m                  1.122e-01  6.600e-03   17.003  < 2e-16 ***
## windspeed_100m                 1.793e-01  4.138e-03   43.335  < 2e-16 ***
## winddirection_10m             -7.227e-03  1.672e-04  -43.230  < 2e-16 ***
## winddirection_100m            -1.037e-03  1.690e-04   -6.133 8.63e-10 ***
## windgusts_10m                 -1.956e-01  2.211e-03  -88.487  < 2e-16 ***
## et0_fao_evapotranspiration    -1.734e+01  2.948e-01  -58.839  < 2e-16 ***
## weathercode1                  -2.663e-01  4.403e-02   -6.049 1.46e-09 ***
## weathercode2                  -9.977e-02  7.916e-02   -1.260    0.208    
## weathercode3                  -8.681e-01  1.042e-01   -8.328  < 2e-16 ***
## weathercode51                  1.843e+00  9.356e-02   19.696  < 2e-16 ***
## weathercode53                  3.157e+00  1.239e-01   25.487  < 2e-16 ***
## weathercode55                  3.289e+00  1.755e-01   18.742  < 2e-16 ***
## weathercode61                  3.318e+00  1.690e-01   19.631  < 2e-16 ***
## weathercode63                  3.052e+00  2.781e-01   10.972  < 2e-16 ***
## weathercode65                  9.820e-01  7.615e-01    1.289    0.197    
## weathercode71                  1.040e+00  1.883e-01    5.526 3.28e-08 ***
## weathercode73                  2.569e+00  2.728e-01    9.416  < 2e-16 ***
## weathercode75                  3.224e+00  7.604e-01    4.240 2.24e-05 ***
## vapor_pressure_deficit         3.095e+00  3.181e-02   97.298  < 2e-16 ***
## soil_temperature_0_to_7cm      7.778e-01  4.730e-03  164.442  < 2e-16 ***
## soil_temperature_7_to_28cm    -6.394e-01  8.186e-03  -78.111  < 2e-16 ***
## soil_temperature_28_to_100cm   2.015e-01  9.113e-03   22.111  < 2e-16 ***
## soil_temperature_100_to_255cm -3.332e-01  4.667e-03  -71.402  < 2e-16 ***
## soil_moisture_0_to_7cm         2.679e+01  2.792e-01   95.952  < 2e-16 ***
## soil_moisture_7_to_28cm       -6.394e+00  4.021e-01  -15.904  < 2e-16 ***
## soil_moisture_28_to_100cm     -1.117e-01  3.028e-01   -0.369    0.712    
## soil_moisture_100_to_255cm    -1.571e+01  2.966e-01  -52.959  < 2e-16 ***
## year                           1.641e-01  3.163e-03   51.876  < 2e-16 ***
## doy                            5.508e-03  1.264e-04   43.582  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.193 on 368064 degrees of freedom
## Multiple R-squared:  0.9435, Adjusted R-squared:  0.9434 
## F-statistic: 1.365e+05 on 45 and 368064 DF,  p-value: < 2.2e-16
allCity %>% 
    filter(tt=="test", year==2022) %>%
    mutate(weathercode=factor(weathercode)) %>%
    mutate(pred=predict(lmRHFull, newdata=.)) %>%
    summarize(meModel=mean((pred-relativehumidity_2m)**2), 
              meBase=mean((relativehumidity_2m-mean(relativehumidity_2m))**2), 
              r2=1-meModel/meBase, 
              rmse=sqrt(meModel)
              )
## # A tibble: 1 × 4
##   meModel meBase    r2  rmse
##     <dbl>  <dbl> <dbl> <dbl>
## 1    45.7   697. 0.934  6.76
summary(lmRHFull)$coefficients %>% 
    as.data.frame() %>% 
    rownames_to_column("Variable") %>% 
    tibble::as_tibble() %>% 
    arrange(desc(abs(`t value`)))
## # A tibble: 46 × 5
##    Variable                   Estimate `Std. Error` `t value` `Pr(>|t|)`
##    <chr>                         <dbl>        <dbl>     <dbl>      <dbl>
##  1 dewpoint_2m                  2.35       0.00388      606.           0
##  2 temperature_2m              -4.01       0.0151      -266.           0
##  3 soil_temperature_0_to_7cm    0.778      0.00473      164.           0
##  4 hour                        -0.217      0.00169     -128.           0
##  5 apparent_temperature         1.35       0.0137        98.8          0
##  6 vapor_pressure_deficit       3.10       0.0318        97.3          0
##  7 soil_moisture_0_to_7cm      26.8        0.279         96.0          0
##  8 windgusts_10m               -0.196      0.00221      -88.5          0
##  9 soil_temperature_7_to_28cm  -0.639      0.00819      -78.1          0
## 10 direct_radiation             0.0209     0.000271      77.1          0
## # ℹ 36 more rows

The linear model prefers dewpoint and temperature to VPD as predictors for relative humidity. The linear model drives lower R-squared and increased RMSE (RMSE ~1.9 RF vs. ~6.8 linear)

Correlations between predictors and relative humidity are assessed:

sapply(varsTrain, FUN=function(x) cor(allCity$relativehumidity_2m, allCity[[x]])) %>% 
    as.data.frame() %>% 
    rownames_to_column("var") %>% 
    tibble::as_tibble() %>% 
    purrr::set_names(c("var", "cor")) %>% 
    ggplot(aes(x=fct_reorder(var, cor), y=cor)) + 
    geom_col(fill="lightblue") + 
    geom_text(data=~filter(., abs(cor)>0.2), aes(y=cor/2, label=round(cor, 2)), size=2.5) +
    coord_flip() + 
    labs(title="Correlation with relative humidity", 
         y="Correlation", 
         x=NULL
         ) + 
    lims(y=c(NA, 1))

allCity %>%
    select(relativehumidity_2m, 
           et0_fao_evapotranspiration, 
           surface_pressure, 
           vapor_pressure_deficit, 
           soil_moisture_0_to_7cm
           ) %>%
    mutate(across(.cols=c(relativehumidity_2m, surface_pressure), .fns=function(x) round(x, 0)),
           across(.cols=c(et0_fao_evapotranspiration), .fns=function(x) round(20*x)/20),
           across(.cols=c(vapor_pressure_deficit), .fns=function(x) round(4*x)/4),
           across(.cols=c(soil_moisture_0_to_7cm), .fns=function(x) round(100*x)/100),
           rn=row_number()
           ) %>%
    pivot_longer(cols=-c(rn, relativehumidity_2m)) %>%
    count(relativehumidity_2m, name, value) %>%
    ggplot(aes(x=value, y=relativehumidity_2m)) + 
    geom_point(aes(size=n), alpha=0.5) + 
    geom_smooth(aes(weight=n), method="lm") +
    facet_wrap(~name, scales="free_x") + 
    labs(x=NULL, title="Relative humidity vs. four potentially strong predictors")
## `geom_smooth()` using formula = 'y ~ x'

The correlations are further explored by city:

allCity %>%
    select(src, 
           relativehumidity_2m, 
           et0_fao_evapotranspiration, 
           surface_pressure, 
           vapor_pressure_deficit, 
           soil_moisture_0_to_7cm
           ) %>%
    mutate(across(.cols=c(relativehumidity_2m, surface_pressure), .fns=function(x) round(x, 0)),
           across(.cols=c(et0_fao_evapotranspiration), .fns=function(x) round(20*x)/20),
           across(.cols=c(vapor_pressure_deficit), .fns=function(x) round(4*x)/4),
           across(.cols=c(soil_moisture_0_to_7cm), .fns=function(x) round(100*x)/100),
           rn=row_number()
           ) %>%
    pivot_longer(cols=-c(rn, src, relativehumidity_2m)) %>%
    count(src, relativehumidity_2m, name, value) %>%
    ggplot(aes(x=value, y=relativehumidity_2m)) + 
    geom_smooth(aes(weight=n, color=src), method="lm") +
    geom_smooth(aes(weight=n), method="lm", lty=2, color="black") +
    facet_wrap(~name, scales="free_x") + 
    labs(x=NULL, 
         title="Relative humidity vs. four potentially strong predictors", 
         subtitle="Best lm fit (dashed line is overall, colored lines are by city"
         ) + 
    scale_color_discrete(NULL)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

Surface pressure appears to be a Simpson’s paradox, with an overall increasing relationship to relative humidity but a generally decreasing relationship when controlled for city. The other plotted predictors have similar trends by individual city and overall

A model using surface pressure only is run, then a model that adds city:

keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022), 
          yVar="relativehumidity_2m", 
          xVars=c("surface_pressure"), 
          dfTest=allCity %>% filter(tt=="test", year==2022), 
          mtry=1,
          useLabel=keyLabel, 
          useSub=stringr::str_to_sentence(keyLabel), 
          isContVar=TRUE,
          rndTo=-1L,
          refXY=TRUE,
          returnData=FALSE
          )

## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.659% (RMSE 19.99 vs. 26.4 null)
## `geom_smooth()` using formula = 'y ~ x'

keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022), 
          yVar="relativehumidity_2m", 
          xVars=c("surface_pressure", "src"), 
          dfTest=allCity %>% filter(tt=="test", year==2022), 
          mtry=2,
          useLabel=keyLabel, 
          useSub=stringr::str_to_sentence(keyLabel), 
          isContVar=TRUE,
          rndTo=-1L,
          refXY=TRUE,
          returnData=FALSE
          )
## Growing trees.. Progress: 77%. Estimated remaining time: 9 seconds.

## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.232% (RMSE 19.36 vs. 26.4 null)
## `geom_smooth()` using formula = 'y ~ x'

Adding city to the surface pressure model only modestly improves predictive power

The linear model is re-run for relative humidity, using only surface pressure and city:

# Eliminate diffuse radiation due to rank-deficiency
lmRHTwo <- lm(relativehumidity_2m ~ src + src:surface_pressure, 
              data=allCity %>% 
                  filter(tt=="train", year<2022) %>% 
                  mutate(weathercode=factor(weathercode)) %>%
                  select(all_of(varsTrain), src) %>% 
                  select(-diffuse_radiation)
              )
summary(lmRHTwo)
## 
## Call:
## lm(formula = relativehumidity_2m ~ src + src:surface_pressure, 
##     data = allCity %>% filter(tt == "train", year < 2022) %>% 
##         mutate(weathercode = factor(weathercode)) %>% select(all_of(varsTrain), 
##         src) %>% select(-diffuse_radiation))
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -69.11 -14.30   0.37  14.49  72.00 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  564.55509   10.02796  56.298  < 2e-16 ***
## srcHouston                   205.50657   16.42320  12.513  < 2e-16 ***
## srcLA                       1055.67031   22.23287  47.482  < 2e-16 ***
## srcNYC                      -100.70531   13.65868  -7.373 1.67e-13 ***
## srcVegas                    -622.35837   16.40974 -37.926  < 2e-16 ***
## srcChicago:surface_pressure   -0.49419    0.01008 -49.046  < 2e-16 ***
## srcHouston:surface_pressure   -0.68528    0.01282 -53.455  < 2e-16 ***
## srcLA:surface_pressure        -1.60176    0.02033 -78.781  < 2e-16 ***
## srcNYC:surface_pressure       -0.38764    0.00916 -42.317  < 2e-16 ***
## srcVegas:surface_pressure      0.09303    0.01389   6.700 2.09e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.28 on 368100 degrees of freedom
## Multiple R-squared:  0.4519, Adjusted R-squared:  0.4519 
## F-statistic: 3.372e+04 on 9 and 368100 DF,  p-value: < 2.2e-16
allCity %>% 
    filter(tt=="test", year==2022) %>%
    mutate(weathercode=factor(weathercode)) %>%
    mutate(pred=predict(lmRHTwo, newdata=.)) %>%
    summarize(meModel=mean((pred-relativehumidity_2m)**2), 
              meBase=mean((relativehumidity_2m-mean(relativehumidity_2m))**2), 
              r2=1-meModel/meBase, 
              rmse=sqrt(meModel)
              )
## # A tibble: 1 × 4
##   meModel meBase    r2  rmse
##     <dbl>  <dbl> <dbl> <dbl>
## 1    375.   697. 0.462  19.4
# summary(lmRHFull)$coefficients %>% 
#     as.data.frame() %>% 
#     rownames_to_column("Variable") %>% 
#     tibble::as_tibble() %>% 
#     arrange(desc(abs(`t value`)))

Results are similar to those from the random forest model

Each combination of two variables is run through the random forest, using a smaller training dataset:

# Variables to explore
useRH <- c(varsTrain[!str_detect(varsTrain, "relativehumidity_2m")], "src", "tod", "month")
useRH
##  [1] "hour"                          "temperature_2m"               
##  [3] "dewpoint_2m"                   "apparent_temperature"         
##  [5] "pressure_msl"                  "surface_pressure"             
##  [7] "precipitation"                 "rain"                         
##  [9] "snowfall"                      "cloudcover"                   
## [11] "cloudcover_low"                "cloudcover_mid"               
## [13] "cloudcover_high"               "shortwave_radiation"          
## [15] "direct_radiation"              "direct_normal_irradiance"     
## [17] "diffuse_radiation"             "windspeed_10m"                
## [19] "windspeed_100m"                "winddirection_10m"            
## [21] "winddirection_100m"            "windgusts_10m"                
## [23] "et0_fao_evapotranspiration"    "weathercode"                  
## [25] "vapor_pressure_deficit"        "soil_temperature_0_to_7cm"    
## [27] "soil_temperature_7_to_28cm"    "soil_temperature_28_to_100cm" 
## [29] "soil_temperature_100_to_255cm" "soil_moisture_0_to_7cm"       
## [31] "soil_moisture_7_to_28cm"       "soil_moisture_28_to_100cm"    
## [33] "soil_moisture_100_to_255cm"    "year"                         
## [35] "doy"                           "src"                          
## [37] "tod"                           "month"
# Subsets to use (dfTrainCloud and dfTestCloud created previously)
set.seed(24100616)
idxSmallRH <- sample(1:nrow(dfTrainCloud), 5000, replace=FALSE)
mtxSmallRH <- matrix(nrow=0, ncol=3)

# Run each combination of variables
for(idx1 in 1:(length(useRH)-1)) {
    for(idx2 in (idx1+1):length(useRH)) {
        r2SmallRH <- runFullRF(dfTrain=dfTrainCloud[idxSmallRH,] %>% mutate(weathercode=factor(weathercode)), 
                               yVar="relativehumidity_2m", 
                               xVars=useRH[c(idx1, idx2)], 
                               dfTest=dfTestCloud %>% mutate(weathercode=factor(weathercode)), 
                               useLabel=keyLabel, 
                               useSub=stringr::str_to_sentence(keyLabel), 
                               isContVar=TRUE,
                               mtry=2,
                               makePlots=FALSE,
                               returnData=TRUE
                               )[["rfAcc"]][["r2"]]
        mtxSmallRH <- rbind(mtxSmallRH, c(idx1, idx2, r2SmallRH))
    }
}
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.774% (RMSE 25.76 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.225% (RMSE 22.05 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.263% (RMSE 26.82 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.833% (RMSE 26.64 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.754% (RMSE 19.44 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.259% (RMSE 23.12 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.847% (RMSE 23.19 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.44% (RMSE 24.56 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.423% (RMSE 22.02 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.06% (RMSE 20.27 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.215% (RMSE 23.73 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.795% (RMSE 25.21 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.501% (RMSE 24.41 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.747% (RMSE 23.79 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.396% (RMSE 23.55 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.968% (RMSE 25.73 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.59% (RMSE 27.51 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.894% (RMSE 27.04 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.862% (RMSE 26.77 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.411% (RMSE 26.97 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.133% (RMSE 27.58 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.948% (RMSE 18.49 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.398% (RMSE 21.38 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.294% (RMSE 15.77 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.116% (RMSE 25.71 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.767% (RMSE 26.16 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.829% (RMSE 27.16 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.35% (RMSE 27.22 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.028% (RMSE 19.21 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.618% (RMSE 21.51 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.357% (RMSE 23.26 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.96% (RMSE 19.94 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.463% (RMSE 25.53 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.494% (RMSE 27.49 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.005% (RMSE 17.51 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.955% (RMSE 24.63 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.659% (RMSE 25.37 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.842% (RMSE 1.05 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.775% (RMSE 14.98 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.966% (RMSE 25.46 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.277% (RMSE 18.61 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.413% (RMSE 24.42 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.311% (RMSE 24.44 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.248% (RMSE 25.42 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.241% (RMSE 23.13 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.734% (RMSE 21.97 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.207% (RMSE 24.59 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.869% (RMSE 25.88 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.333% (RMSE 24.14 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.309% (RMSE 23.86 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.816% (RMSE 23.19 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.541% (RMSE 24.97 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.058% (RMSE 26.26 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.076% (RMSE 26.12 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.165% (RMSE 25.84 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.469% (RMSE 25.93 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.33% (RMSE 26.22 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.994% (RMSE 20.62 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.789% (RMSE 23.79 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.816% (RMSE 1.13 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.969% (RMSE 25.32 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.483% (RMSE 25.25 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.078% (RMSE 26.12 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.439% (RMSE 26.59 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.894% (RMSE 20.29 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.195% (RMSE 21.41 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.896% (RMSE 22.88 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.531% (RMSE 20.86 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.199% (RMSE 27.71 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.141% (RMSE 24.17 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.983% (RMSE 19.93 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.97% (RMSE 25.6 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.848% (RMSE 24.92 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 97.113% (RMSE 4.48 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.67% (RMSE 22.76 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.107% (RMSE 18.65 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.219% (RMSE 22.52 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.851% (RMSE 22.73 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.24% (RMSE 23.13 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.41% (RMSE 21.38 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.867% (RMSE 20.81 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.654% (RMSE 22.76 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.392% (RMSE 23.99 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.215% (RMSE 20.07 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.001% (RMSE 19.75 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.043% (RMSE 19.92 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.284% (RMSE 21.4 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.095% (RMSE 23.6 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.561% (RMSE 23.08 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.77% (RMSE 23.79 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.746% (RMSE 23.94 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.351% (RMSE 23.85 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 68.136% (RMSE 14.9 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.145% (RMSE 21.74 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.931% (RMSE 0.69 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.857% (RMSE 8.81 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 70.615% (RMSE 14.31 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.591% (RMSE 16.78 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.139% (RMSE 20.25 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.725% (RMSE 17.96 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.252% (RMSE 18.99 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.602% (RMSE 21.99 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.459% (RMSE 19.49 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.434% (RMSE 25.26 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.776% (RMSE 19.44 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.025% (RMSE 18.85 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.695% (RMSE 23.06 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.372% (RMSE 20.55 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.454% (RMSE 26.59 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.17% (RMSE 19.72 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.889% (RMSE 26.01 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.852% (RMSE 26.02 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.618% (RMSE 27.26 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.456% (RMSE 24.56 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.349% (RMSE 23.26 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.42% (RMSE 26.71 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.058% (RMSE 27.69 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.44% (RMSE 25.4 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.127% (RMSE 25.02 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.256% (RMSE 24.59 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.888% (RMSE 26.51 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.356% (RMSE 28.1 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.823% (RMSE 27.91 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.386% (RMSE 27.98 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.339% (RMSE 27.73 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.584% (RMSE 28.01 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.232% (RMSE 20.91 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.042% (RMSE 25.04 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.057% (RMSE 2.56 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.369% (RMSE 23.41 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.742% (RMSE 26.89 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.22% (RMSE 27.33 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.444% (RMSE 28.36 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.178% (RMSE 20.25 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.139% (RMSE 22.53 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.099% (RMSE 24.18 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.887% (RMSE 21.79 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -25.288% (RMSE 29.55 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.922% (RMSE 27.17 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.195% (RMSE 21.09 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.379% (RMSE 27.35 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.547% (RMSE 28.13 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.825% (RMSE 19.07 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.889% (RMSE 25.47 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.017% (RMSE 25.59 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.352% (RMSE 26.84 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.685% (RMSE 24.38 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.879% (RMSE 22.88 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.637% (RMSE 26.05 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.095% (RMSE 27.7 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.681% (RMSE 24.81 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.928% (RMSE 24.35 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.942% (RMSE 23.77 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.665% (RMSE 25.64 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.898% (RMSE 27.92 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.813% (RMSE 27.41 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.737% (RMSE 27.9 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.099% (RMSE 27.82 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.327% (RMSE 28.1 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.438% (RMSE 20.71 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.117% (RMSE 24.46 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 71.073% (RMSE 14.2 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.214% (RMSE 25.43 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.234% (RMSE 26.1 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.336% (RMSE 26.35 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.611% (RMSE 27.38 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.41% (RMSE 21.05 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.63% (RMSE 22.92 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.178% (RMSE 24.02 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.787% (RMSE 22.12 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -26.381% (RMSE 29.68 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.489% (RMSE 26.98 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.671% (RMSE 21.66 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.371% (RMSE 26.58 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.895% (RMSE 27.55 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.114% (RMSE 20.77 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.913% (RMSE 20.8 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.668% (RMSE 21.66 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.011% (RMSE 19.75 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.577% (RMSE 19.47 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.249% (RMSE 21.08 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.067% (RMSE 22.07 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.888% (RMSE 18.69 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.532% (RMSE 18.38 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.858% (RMSE 18.12 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.971% (RMSE 19.76 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.256% (RMSE 20.4 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.722% (RMSE 21.33 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.308% (RMSE 21.23 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.304% (RMSE 20.9 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.581% (RMSE 20.18 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.806% (RMSE 15.21 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.648% (RMSE 20.16 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.501% (RMSE 11.95 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.999% (RMSE 19.04 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.061% (RMSE 20.94 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.126% (RMSE 21.59 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.168% (RMSE 21.42 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.188% (RMSE 20.07 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.109% (RMSE 21.91 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.846% (RMSE 22.88 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.667% (RMSE 21.82 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.909% (RMSE 22.57 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.737% (RMSE 21.16 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.672% (RMSE 21.34 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.616% (RMSE 21.02 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.93% (RMSE 21.78 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.265% (RMSE 25.14 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.27% (RMSE 25.14 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.156% (RMSE 23.14 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.141% (RMSE 22.06 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.215% (RMSE 24.87 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.847% (RMSE 25.34 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.77% (RMSE 24.08 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.063% (RMSE 23.75 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.706% (RMSE 23.8 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.135% (RMSE 24.03 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.029% (RMSE 26.13 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.013% (RMSE 26.4 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.853% (RMSE 26.15 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.241% (RMSE 26.23 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.487% (RMSE 25.39 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.212% (RMSE 18.25 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.663% (RMSE 23.36 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 68.398% (RMSE 14.84 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.607% (RMSE 24.39 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.05% (RMSE 25.45 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.289% (RMSE 26.23 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.663% (RMSE 25.77 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.706% (RMSE 20.83 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.343% (RMSE 22.19 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.187% (RMSE 24.17 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.57% (RMSE 20.52 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.987% (RMSE 25.18 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.269% (RMSE 26.43 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.464% (RMSE 19.31 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.179% (RMSE 24.17 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.196% (RMSE 25.29 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.269% (RMSE 25.14 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.294% (RMSE 23.12 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.236% (RMSE 22.05 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.127% (RMSE 24.88 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.528% (RMSE 25.38 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.731% (RMSE 24.09 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.095% (RMSE 23.74 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.884% (RMSE 23.77 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.926% (RMSE 24.06 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.477% (RMSE 26.2 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.534% (RMSE 26.47 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.519% (RMSE 26.2 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.824% (RMSE 26.29 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.616% (RMSE 25.51 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.342% (RMSE 18.22 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.666% (RMSE 23.36 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 68.609% (RMSE 14.79 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.669% (RMSE 24.38 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.942% (RMSE 25.46 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.125% (RMSE 26.25 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.747% (RMSE 25.76 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.621% (RMSE 20.85 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.322% (RMSE 22.19 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.685% (RMSE 24.09 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.691% (RMSE 20.5 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.316% (RMSE 25.28 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.542% (RMSE 26.47 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.473% (RMSE 19.31 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.737% (RMSE 24.23 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.704% (RMSE 25.36 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.677% (RMSE 23.21 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.13% (RMSE 22.22 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.5% (RMSE 25.39 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.567% (RMSE 26.32 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.225% (RMSE 25.15 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.777% (RMSE 24.65 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.831% (RMSE 24.5 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.859% (RMSE 25.34 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.358% (RMSE 27.35 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.13% (RMSE 27.58 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.138% (RMSE 27.32 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.225% (RMSE 27.33 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.174% (RMSE 26.81 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.598% (RMSE 19.47 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.759% (RMSE 23.35 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.811% (RMSE 15.21 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.408% (RMSE 25.4 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.125% (RMSE 26.54 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.905% (RMSE 27.29 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.378% (RMSE 27.1 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.089% (RMSE 21.27 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.728% (RMSE 22.9 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.425% (RMSE 24.98 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.538% (RMSE 21.36 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.859% (RMSE 26.51 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.994% (RMSE 27.68 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.264% (RMSE 20.06 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.15% (RMSE 25.57 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.382% (RMSE 26.45 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.42% (RMSE 22.49 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.836% (RMSE 23.49 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.51% (RMSE 23.53 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.668% (RMSE 22.14 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.666% (RMSE 21.98 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.296% (RMSE 22.04 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.714% (RMSE 22.29 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.663% (RMSE 24.95 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.583% (RMSE 25.24 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.406% (RMSE 25.4 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.906% (RMSE 25.33 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.263% (RMSE 24.16 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.183% (RMSE 17.67 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.298% (RMSE 23.12 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 68.626% (RMSE 14.79 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.499% (RMSE 23.09 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.541% (RMSE 24.69 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.461% (RMSE 24.98 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.301% (RMSE 24.86 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.515% (RMSE 20.36 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.843% (RMSE 21.14 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.548% (RMSE 22.93 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.449% (RMSE 20.2 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.755% (RMSE 25.49 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.548% (RMSE 25.65 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.284% (RMSE 18.61 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.688% (RMSE 22.13 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.122% (RMSE 24.32 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.744% (RMSE 22.44 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.761% (RMSE 22.59 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.546% (RMSE 20.86 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.949% (RMSE 20.79 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.545% (RMSE 21.03 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.05% (RMSE 20.94 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.469% (RMSE 23.39 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.581% (RMSE 23.67 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.399% (RMSE 23.85 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.426% (RMSE 23.99 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.736% (RMSE 22.44 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 63.023% (RMSE 16.05 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.002% (RMSE 21.93 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 71.556% (RMSE 14.08 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.31% (RMSE 22.04 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.201% (RMSE 23.13 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.026% (RMSE 23.01 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.148% (RMSE 22.68 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.871% (RMSE 19.42 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.2% (RMSE 20.58 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.469% (RMSE 22.33 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.698% (RMSE 19.27 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.26% (RMSE 22.82 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.614% (RMSE 23.96 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.952% (RMSE 17.72 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.712% (RMSE 21 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.564% (RMSE 22.62 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.793% (RMSE 26.16 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.472% (RMSE 24.12 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.758% (RMSE 23.94 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.51% (RMSE 23.68 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.048% (RMSE 24.47 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.74% (RMSE 26.76 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.143% (RMSE 27.2 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.521% (RMSE 27.24 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.941% (RMSE 27.04 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.152% (RMSE 26.11 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.104% (RMSE 18.65 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.995% (RMSE 23.46 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 70.009% (RMSE 14.46 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.189% (RMSE 24.88 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.089% (RMSE 26.12 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.128% (RMSE 27.07 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.363% (RMSE 26.58 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.939% (RMSE 21.29 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.507% (RMSE 22.47 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.414% (RMSE 24.98 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.527% (RMSE 21.52 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.018% (RMSE 26.26 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.653% (RMSE 27.39 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.629% (RMSE 19.99 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.931% (RMSE 24.49 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.548% (RMSE 26.06 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.052% (RMSE 25.03 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.696% (RMSE 24.66 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.264% (RMSE 24.3 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.687% (RMSE 25.77 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.481% (RMSE 28.12 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.547% (RMSE 28 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.918% (RMSE 28.42 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.76% (RMSE 28.28 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.347% (RMSE 27.6 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.82% (RMSE 19.96 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.363% (RMSE 24 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.85% (RMSE 15.43 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.437% (RMSE 25.8 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.136% (RMSE 27.19 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.187% (RMSE 27.83 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.152% (RMSE 27.58 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.735% (RMSE 21.65 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.177% (RMSE 23.14 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.441% (RMSE 25.26 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.564% (RMSE 22.15 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.262% (RMSE 27.59 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.591% (RMSE 28.87 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.014% (RMSE 20.61 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.083% (RMSE 25.72 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.927% (RMSE 27.42 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.392% (RMSE 23.99 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.234% (RMSE 23.72 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.673% (RMSE 23.95 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.713% (RMSE 25.63 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.281% (RMSE 25.96 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.781% (RMSE 25.07 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.221% (RMSE 25.15 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.098% (RMSE 25.31 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.314% (RMSE 17.84 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.418% (RMSE 22.8 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.161% (RMSE 15.13 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.584% (RMSE 24.4 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.785% (RMSE 24.79 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.79% (RMSE 25.89 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.894% (RMSE 25.2 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.553% (RMSE 18.93 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.553% (RMSE 20.69 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.761% (RMSE 22.28 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.172% (RMSE 19.19 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.123% (RMSE 25.71 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.914% (RMSE 25.47 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.609% (RMSE 18.17 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.579% (RMSE 25.24 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.79% (RMSE 24.93 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.985% (RMSE 23.91 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.925% (RMSE 23.91 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.024% (RMSE 25.04 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.156% (RMSE 25.43 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.552% (RMSE 24.68 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.769% (RMSE 24.79 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.754% (RMSE 24.8 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.852% (RMSE 18.88 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.187% (RMSE 22.68 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.375% (RMSE 15.08 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.808% (RMSE 24.08 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.661% (RMSE 24.39 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.523% (RMSE 25.38 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.065% (RMSE 24.75 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.476% (RMSE 18.58 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.27% (RMSE 20.4 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.071% (RMSE 22.07 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.589% (RMSE 18.93 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.122% (RMSE 25.44 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.377% (RMSE 25.27 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.066% (RMSE 17.89 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.3% (RMSE 24.58 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.545% (RMSE 24.54 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.544% (RMSE 23.82 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.182% (RMSE 24.74 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.468% (RMSE 24.98 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.142% (RMSE 24.17 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.122% (RMSE 24.18 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.216% (RMSE 24.45 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.106% (RMSE 18.46 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.643% (RMSE 22.76 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 68.908% (RMSE 14.72 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.675% (RMSE 23.51 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.629% (RMSE 23.81 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.155% (RMSE 24.74 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.384% (RMSE 24.28 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.622% (RMSE 18.17 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.978% (RMSE 19.93 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.539% (RMSE 21.84 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.83% (RMSE 18.7 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.291% (RMSE 24.58 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.589% (RMSE 24.96 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.481% (RMSE 17.61 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.67% (RMSE 24.38 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.608% (RMSE 24.11 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.789% (RMSE 26.76 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.472% (RMSE 26.85 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.692% (RMSE 26.31 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.216% (RMSE 26.24 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.87% (RMSE 26.51 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.377% (RMSE 17.23 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.485% (RMSE 22.63 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.303% (RMSE 15.09 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.479% (RMSE 25.11 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.394% (RMSE 25.81 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.801% (RMSE 26.63 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.358% (RMSE 26.35 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.99% (RMSE 19.76 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.644% (RMSE 21.66 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.747% (RMSE 23.35 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.334% (RMSE 20.22 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.571% (RMSE 26.73 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.533% (RMSE 26.73 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.202% (RMSE 19.18 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.017% (RMSE 25.45 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.249% (RMSE 26.23 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.329% (RMSE 26.96 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -20.121% (RMSE 28.93 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.974% (RMSE 28.55 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.975% (RMSE 28.43 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.785% (RMSE 20.31 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.086% (RMSE 24.75 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.65% (RMSE 15.69 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.471% (RMSE 26.33 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.954% (RMSE 27.55 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.134% (RMSE 28.69 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.414% (RMSE 28.36 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.499% (RMSE 20.7 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.088% (RMSE 23.15 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.715% (RMSE 25.22 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.412% (RMSE 21.21 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -32.755% (RMSE 30.41 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.902% (RMSE 28.78 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.864% (RMSE 20.3 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.703% (RMSE 27.27 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -25.185% (RMSE 29.53 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.634% (RMSE 28.39 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.555% (RMSE 28.62 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.578% (RMSE 27.25 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.157% (RMSE 20.59 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.147% (RMSE 25.44 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.846% (RMSE 15.65 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.36% (RMSE 25.95 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.899% (RMSE 27.42 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.875% (RMSE 28.54 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.023% (RMSE 27.94 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.593% (RMSE 21.02 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.191% (RMSE 23.28 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.129% (RMSE 25.3 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.29% (RMSE 21.88 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -26.29% (RMSE 29.66 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.354% (RMSE 28.35 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.955% (RMSE 21.45 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.093% (RMSE 27.7 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -20.859% (RMSE 29.02 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -22.375% (RMSE 29.2 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.917% (RMSE 28.66 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.832% (RMSE 20.81 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.845% (RMSE 25.61 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 68.665% (RMSE 14.78 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.977% (RMSE 26 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.571% (RMSE 27.38 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.528% (RMSE 28.13 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.28% (RMSE 28.22 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.355% (RMSE 21.06 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.634% (RMSE 23.22 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.262% (RMSE 25.01 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.473% (RMSE 22.32 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -30.51% (RMSE 30.16 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.581% (RMSE 28.5 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.881% (RMSE 21.79 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.692% (RMSE 27.14 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -22.777% (RMSE 29.25 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.428% (RMSE 28.85 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.199% (RMSE 21.25 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.171% (RMSE 25.57 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 68.182% (RMSE 14.89 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.648% (RMSE 25.91 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.135% (RMSE 27.32 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.857% (RMSE 28.29 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.222% (RMSE 28.21 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.587% (RMSE 20.85 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.988% (RMSE 23.01 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.992% (RMSE 24.76 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.353% (RMSE 21.87 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -30.094% (RMSE 30.11 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.564% (RMSE 28.74 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.172% (RMSE 21.25 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.052% (RMSE 27.31 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -22.897% (RMSE 29.26 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.042% (RMSE 20.27 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.982% (RMSE 23.91 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.103% (RMSE 15.37 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.581% (RMSE 26.32 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.383% (RMSE 27.48 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.841% (RMSE 28.65 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.813% (RMSE 28.41 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.504% (RMSE 20.02 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.293% (RMSE 22.51 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.235% (RMSE 24.73 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.51% (RMSE 21.2 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -24.561% (RMSE 29.46 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.103% (RMSE 28.69 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.675% (RMSE 19.46 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.272% (RMSE 26.56 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.614% (RMSE 28.75 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.813% (RMSE 17.15 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.774% (RMSE 14.98 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.274% (RMSE 20.74 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.615% (RMSE 21.18 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.718% (RMSE 21.49 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.372% (RMSE 21.55 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.753% (RMSE 16.11 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.379% (RMSE 17.43 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.611% (RMSE 18.17 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 63.497% (RMSE 15.95 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.913% (RMSE 21.13 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.133% (RMSE 20.76 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 70.58% (RMSE 14.32 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.217% (RMSE 18.25 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.631% (RMSE 19.99 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 68.366% (RMSE 14.85 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.747% (RMSE 23.65 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.605% (RMSE 24.68 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.151% (RMSE 25.3 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.201% (RMSE 24.87 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.106% (RMSE 20.43 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.446% (RMSE 21.7 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.807% (RMSE 23.49 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.878% (RMSE 20.64 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.222% (RMSE 24.16 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.514% (RMSE 25.93 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.786% (RMSE 18.14 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.204% (RMSE 22.21 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.501% (RMSE 23.39 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.046% (RMSE 6.96 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.912% (RMSE 10.25 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 76.886% (RMSE 12.69 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.648% (RMSE 15.24 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.918% (RMSE 13.74 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.086% (RMSE 13.69 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 70.565% (RMSE 14.32 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.905% (RMSE 13.74 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.882% (RMSE 16.72 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.557% (RMSE 11.02 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 76.541% (RMSE 12.79 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.525% (RMSE 15.5 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 81.639% (RMSE 11.31 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.382% (RMSE 23.11 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.561% (RMSE 25.24 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.814% (RMSE 26.29 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.197% (RMSE 20.58 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.739% (RMSE 21.97 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.906% (RMSE 23.18 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.344% (RMSE 21.55 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.317% (RMSE 27.22 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.104% (RMSE 24.46 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.72% (RMSE 20.66 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.435% (RMSE 25.67 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.3% (RMSE 25.42 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.612% (RMSE 26.61 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.26% (RMSE 28.71 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.547% (RMSE 21.19 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.652% (RMSE 22.91 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.718% (RMSE 23.94 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.676% (RMSE 22.45 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -22.918% (RMSE 29.27 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.841% (RMSE 26.51 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.586% (RMSE 21.99 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.521% (RMSE 26.47 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.776% (RMSE 27.02 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -34.868% (RMSE 30.66 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.916% (RMSE 21.3 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.956% (RMSE 23.47 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.705% (RMSE 24.66 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.594% (RMSE 22.77 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -29.977% (RMSE 30.09 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.63% (RMSE 28.75 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.658% (RMSE 22.14 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.838% (RMSE 27.28 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.441% (RMSE 28.24 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.286% (RMSE 21.88 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.055% (RMSE 23.15 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.692% (RMSE 24.09 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.116% (RMSE 23.59 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -28.749% (RMSE 29.95 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.252% (RMSE 27.97 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.534% (RMSE 21.84 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.907% (RMSE 26.52 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.174% (RMSE 27.83 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.312% (RMSE 21.07 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.931% (RMSE 21.94 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.516% (RMSE 21.52 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.173% (RMSE 22.22 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.998% (RMSE 20.79 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.172% (RMSE 21.09 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.732% (RMSE 20.49 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.876% (RMSE 21.3 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.706% (RMSE 23.06 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.788% (RMSE 22.12 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.887% (RMSE 23.48 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.856% (RMSE 23.33 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.162% (RMSE 22.53 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.932% (RMSE 22.41 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.729% (RMSE 24.09 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.116% (RMSE 22.99 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.843% (RMSE 24.92 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.98% (RMSE 25.87 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.907% (RMSE 23.48 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.631% (RMSE 24.25 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.097% (RMSE 26.25 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.284% (RMSE 22.66 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.864% (RMSE 22.73 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.42% (RMSE 20.88 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.399% (RMSE 20.38 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.397% (RMSE 22.8 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -35.457% (RMSE 30.72 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.504% (RMSE 20.53 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.226% (RMSE 25.83 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.591% (RMSE 27.25 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.325% (RMSE 22.81 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.665% (RMSE 27.89 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.705% (RMSE 27.77 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.111% (RMSE 18.83 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.791% (RMSE 20.14 vs. 26.4 null)
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.111% (RMSE 25.71 vs. 26.4 null)

R-squared by pairs of metrics is explored:

dfSmallR2RH <- as.data.frame(mtxSmallRH) %>% 
    purrr::set_names(c("idx1", "idx2", "r2")) %>% 
    tibble::as_tibble() %>% 
    mutate(var1=useRH[idx1], var2=useRH[idx2], rn=row_number()) 
dfSmallR2RH %>% arrange(desc(r2)) %>% select(var1, var2, r2) %>% print(n=20)
## # A tibble: 703 × 3
##    var1                       var2                            r2
##    <chr>                      <chr>                        <dbl>
##  1 dewpoint_2m                vapor_pressure_deficit       0.999
##  2 temperature_2m             dewpoint_2m                  0.998
##  3 temperature_2m             vapor_pressure_deficit       0.998
##  4 apparent_temperature       vapor_pressure_deficit       0.991
##  5 dewpoint_2m                apparent_temperature         0.971
##  6 vapor_pressure_deficit     soil_temperature_0_to_7cm    0.930
##  7 dewpoint_2m                soil_temperature_0_to_7cm    0.889
##  8 vapor_pressure_deficit     soil_temperature_7_to_28cm   0.849
##  9 vapor_pressure_deficit     doy                          0.826
## 10 vapor_pressure_deficit     month                        0.816
## 11 surface_pressure           vapor_pressure_deficit       0.795
## 12 vapor_pressure_deficit     soil_temperature_28_to_100cm 0.769
## 13 vapor_pressure_deficit     src                          0.765
## 14 vapor_pressure_deficit     soil_moisture_7_to_28cm      0.731
## 15 vapor_pressure_deficit     soil_moisture_0_to_7cm       0.729
## 16 vapor_pressure_deficit     soil_moisture_100_to_255cm   0.729
## 17 cloudcover_low             vapor_pressure_deficit       0.716
## 18 pressure_msl               vapor_pressure_deficit       0.711
## 19 dewpoint_2m                soil_temperature_7_to_28cm   0.706
## 20 et0_fao_evapotranspiration src                          0.706
## # ℹ 683 more rows
dfSmallR2RH %>% 
    pivot_longer(cols=c(var1, var2)) %>% 
    group_by(value) %>% 
    summarize(across(r2, .fns=list("min"=min, "mu"=mean, "max"=max))) %>% 
    ggplot(aes(x=fct_reorder(value, r2_mu))) + 
    coord_flip() + 
    geom_point(aes(y=r2_mu)) + 
    geom_errorbar(aes(ymin=r2_min, ymax=r2_max)) + 
    lims(y=c(NA, 1)) + 
    geom_hline(yintercept=1, lty=2, color="red") +
    labs(title="R-squared in every 2-predictor model including self and one other", 
         subtitle="Predicting relative humidity", 
         y="Range of R2 (min-mean-max)", 
         x=NULL
    )

dfSmallR2RH %>% 
    arrange(desc(r2)) %>% 
    filter(!str_detect(var2, "vapor|dewpo"), !str_detect(var1, "vapor|dewpo")) %>% 
    select(var1, var2, r2) %>% 
    print(n=20)
## # A tibble: 630 × 3
##    var1                       var2                          r2
##    <chr>                      <chr>                      <dbl>
##  1 et0_fao_evapotranspiration src                        0.706
##  2 temperature_2m             apparent_temperature       0.678
##  3 surface_pressure           et0_fao_evapotranspiration 0.668
##  4 et0_fao_evapotranspiration soil_moisture_100_to_255cm 0.635
##  5 cloudcover_low             et0_fao_evapotranspiration 0.630
##  6 et0_fao_evapotranspiration soil_moisture_0_to_7cm     0.628
##  7 et0_fao_evapotranspiration weathercode                0.578
##  8 diffuse_radiation          et0_fao_evapotranspiration 0.574
##  9 et0_fao_evapotranspiration soil_moisture_7_to_28cm    0.564
## 10 hour                       src                        0.560
## 11 direct_normal_irradiance   src                        0.555
## 12 cloudcover                 et0_fao_evapotranspiration 0.552
## 13 cloudcover_low             src                        0.550
## 14 shortwave_radiation        et0_fao_evapotranspiration 0.543
## 15 direct_radiation           src                        0.541
## 16 surface_pressure           direct_normal_irradiance   0.529
## 17 weathercode                src                        0.528
## 18 direct_normal_irradiance   soil_moisture_0_to_7cm     0.526
## 19 et0_fao_evapotranspiration soil_moisture_28_to_100cm  0.526
## 20 shortwave_radiation        src                        0.526
## # ℹ 610 more rows
dfSmallR2RH %>% 
    filter(!str_detect(var2, "vapor|dewpo"), !str_detect(var1, "vapor|dewpo")) %>% 
    pivot_longer(cols=c(var1, var2)) %>% 
    group_by(value) %>% 
    summarize(across(r2, .fns=list("min"=min, "mu"=mean, "max"=max))) %>% 
    ggplot(aes(x=fct_reorder(value, r2_mu))) + 
    coord_flip() + 
    geom_point(aes(y=r2_mu)) + 
    geom_errorbar(aes(ymin=r2_min, ymax=r2_max)) + 
    lims(y=c(NA, 1)) + 
    geom_hline(yintercept=1, lty=2, color="red") +
    labs(title="R-squared in every 2-predictor model including self and one other", 
         subtitle="Predicting relative humidity (excluding dewpoint and VPD)", 
         y="Range of R2 (min-mean-max)", 
         x=NULL
    )

# Null accuracy would pick the most frequent observation
# allCity %>% count(weathercode, sort=TRUE) %>% mutate(pct=n/sum(n))

Combinations of vapor pressure deficit, dewpoint, and temperature tend to strongly predict relative humidity. Absent those, evapotranspiration and location (city) are generally the next best predictors

Select combinations are explored using the full training dataset:

possLargeRH <- c("temperature_2m", 
                 "dewpoint_2m", 
                 "vapor_pressure_deficit", 
                 "apparent_temperature", 
                 "et0_fao_evapotranspiration", 
                 "src"
                 )
possLargeRH
## [1] "temperature_2m"             "dewpoint_2m"               
## [3] "vapor_pressure_deficit"     "apparent_temperature"      
## [5] "et0_fao_evapotranspiration" "src"
mtxLargeRH <- matrix(nrow=0, ncol=3)

for(idx1 in 1:(length(possLargeRH)-1)) {
    for(idx2 in (idx1+1):length(possLargeRH)) {
        r2LargeRH <- runFullRF(dfTrain=dfTrainCloud[,] %>% mutate(weathercode=factor(weathercode)), 
                               yVar="relativehumidity_2m", 
                               xVars=possLargeRH[c(idx1, idx2)], 
                               dfTest=dfTestCloud %>% mutate(weathercode=factor(weathercode)),
                               useLabel=keyLabel, 
                               useSub=stringr::str_to_sentence(keyLabel), 
                               isContVar=TRUE,
                               mtry=2,
                               makePlots=FALSE,
                               returnData=TRUE
                               )[["rfAcc"]][["r2"]]
        mtxLargeRH <- rbind(mtxLargeRH, c(idx1, idx2, r2LargeRH))
    }
}
## Growing trees.. Progress: 37%. Estimated remaining time: 52 seconds.
## Growing trees.. Progress: 70%. Estimated remaining time: 26 seconds.
## Growing trees.. Progress: 98%. Estimated remaining time: 1 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.984% (RMSE 0.33 vs. 26.4 null)
## Growing trees.. Progress: 48%. Estimated remaining time: 33 seconds.
## Growing trees.. Progress: 92%. Estimated remaining time: 5 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.973% (RMSE 0.44 vs. 26.4 null)
## Growing trees.. Progress: 35%. Estimated remaining time: 57 seconds.
## Growing trees.. Progress: 71%. Estimated remaining time: 25 seconds.
## Growing trees.. Progress: 99%. Estimated remaining time: 0 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 71.766% (RMSE 14.03 vs. 26.4 null)
## Growing trees.. Progress: 45%. Estimated remaining time: 37 seconds.
## Growing trees.. Progress: 90%. Estimated remaining time: 6 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.777% (RMSE 19.08 vs. 26.4 null)
## Growing trees.. Progress: 74%. Estimated remaining time: 10 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.701% (RMSE 17.96 vs. 26.4 null)
## Growing trees.. Progress: 52%. Estimated remaining time: 28 seconds.
## Growing trees.. Progress: 99%. Estimated remaining time: 0 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.985% (RMSE 0.32 vs. 26.4 null)
## Growing trees.. Progress: 40%. Estimated remaining time: 46 seconds.
## Growing trees.. Progress: 80%. Estimated remaining time: 15 seconds.
## Growing trees.. Progress: 100%. Estimated remaining time: 0 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 97.335% (RMSE 4.31 vs. 26.4 null)
## Growing trees.. Progress: 57%. Estimated remaining time: 23 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.139% (RMSE 13.68 vs. 26.4 null)
## Growing trees.. Progress: 99%. Estimated remaining time: 0 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.472% (RMSE 16.8 vs. 26.4 null)
## Growing trees.. Progress: 43%. Estimated remaining time: 40 seconds.
## Growing trees.. Progress: 91%. Estimated remaining time: 6 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.14% (RMSE 2.45 vs. 26.4 null)
## Growing trees.. Progress: 45%. Estimated remaining time: 38 seconds.
## Growing trees.. Progress: 91%. Estimated remaining time: 5 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 71.764% (RMSE 14.03 vs. 26.4 null)
## Growing trees.. Progress: 90%. Estimated remaining time: 3 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.906% (RMSE 11.83 vs. 26.4 null)
## Growing trees.. Progress: 36%. Estimated remaining time: 55 seconds.
## Growing trees.. Progress: 71%. Estimated remaining time: 25 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.867% (RMSE 19.42 vs. 26.4 null)
## Growing trees.. Progress: 69%. Estimated remaining time: 14 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.349% (RMSE 18.97 vs. 26.4 null)
## Growing trees.. Progress: 96%. Estimated remaining time: 1 seconds.
## 
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.428% (RMSE 13.86 vs. 26.4 null)
dfLargeR2RH <- as.data.frame(mtxLargeRH) %>% 
    purrr::set_names(c("idx1", "idx2", "r2")) %>% 
    tibble::as_tibble() %>% 
    mutate(var1=possLargeRH[idx1], var2=possLargeRH[idx2], rn=row_number()) 
dfLargeR2RH %>% arrange(desc(r2)) %>% select(var1, var2, r2) %>% print(n=20)
## # A tibble: 15 × 3
##    var1                       var2                          r2
##    <chr>                      <chr>                      <dbl>
##  1 dewpoint_2m                vapor_pressure_deficit     1.00 
##  2 temperature_2m             dewpoint_2m                1.00 
##  3 temperature_2m             vapor_pressure_deficit     1.00 
##  4 vapor_pressure_deficit     apparent_temperature       0.991
##  5 dewpoint_2m                apparent_temperature       0.973
##  6 vapor_pressure_deficit     src                        0.799
##  7 dewpoint_2m                et0_fao_evapotranspiration 0.731
##  8 et0_fao_evapotranspiration src                        0.724
##  9 temperature_2m             apparent_temperature       0.718
## 10 vapor_pressure_deficit     et0_fao_evapotranspiration 0.718
## 11 dewpoint_2m                src                        0.595
## 12 temperature_2m             src                        0.537
## 13 apparent_temperature       src                        0.483
## 14 temperature_2m             et0_fao_evapotranspiration 0.478
## 15 apparent_temperature       et0_fao_evapotranspiration 0.459

Data for Miami are downloaded, cached to avoid multiple hits to the server:

# Hourly data download for Miami, FL
testURLHourly <- helperOpenMeteoURL(cityName="Miami FL", 
                                    hourlyIndices=1:nrow(tblMetricsHourly),
                                    startDate="2010-01-01", 
                                    endDate="2023-12-31", 
                                    tz="US/Eastern"
                                    )
## 
## Hourly metrics created from indices: temperature_2m,relativehumidity_2m,dewpoint_2m,apparent_temperature,pressure_msl,surface_pressure,precipitation,rain,snowfall,cloudcover,cloudcover_low,cloudcover_mid,cloudcover_high,shortwave_radiation,direct_radiation,direct_normal_irradiance,diffuse_radiation,windspeed_10m,windspeed_100m,winddirection_10m,winddirection_100m,windgusts_10m,et0_fao_evapotranspiration,weathercode,vapor_pressure_deficit,soil_temperature_0_to_7cm,soil_temperature_7_to_28cm,soil_temperature_28_to_100cm,soil_temperature_100_to_255cm,soil_moisture_0_to_7cm,soil_moisture_7_to_28cm,soil_moisture_28_to_100cm,soil_moisture_100_to_255cm
testURLHourly
## [1] "https://archive-api.open-meteo.com/v1/archive?latitude=25.78&longitude=-80.21&start_date=2010-01-01&end_date=2023-12-31&hourly=temperature_2m,relativehumidity_2m,dewpoint_2m,apparent_temperature,pressure_msl,surface_pressure,precipitation,rain,snowfall,cloudcover,cloudcover_low,cloudcover_mid,cloudcover_high,shortwave_radiation,direct_radiation,direct_normal_irradiance,diffuse_radiation,windspeed_10m,windspeed_100m,winddirection_10m,winddirection_100m,windgusts_10m,et0_fao_evapotranspiration,weathercode,vapor_pressure_deficit,soil_temperature_0_to_7cm,soil_temperature_7_to_28cm,soil_temperature_28_to_100cm,soil_temperature_100_to_255cm,soil_moisture_0_to_7cm,soil_moisture_7_to_28cm,soil_moisture_28_to_100cm,soil_moisture_100_to_255cm&timezone=US%2FEastern"
# Download file
if(!file.exists("testOM_hourly_mia.json")) {
    fileDownload(fileName="testOM_hourly_mia.json", url=testURLHourly)
} else {
    cat("\nFile testOM_hourly_mia.json already exists, skipping download\n")
}
## 
## File testOM_hourly_mia.json already exists, skipping download
# Daily data download for Miami, FL
testURLDaily <- helperOpenMeteoURL(cityName="Miami FL", 
                                   dailyIndices=1:nrow(tblMetricsDaily),
                                   startDate="2010-01-01", 
                                   endDate="2023-12-31", 
                                   tz="US/Eastern"
                                   )
## 
## Daily metrics created from indices: weathercode,temperature_2m_max,temperature_2m_min,apparent_temperature_max,apparent_temperature_min,precipitation_sum,rain_sum,snowfall_sum,precipitation_hours,sunrise,sunset,windspeed_10m_max,windgusts_10m_max,winddirection_10m_dominant,shortwave_radiation_sum,et0_fao_evapotranspiration
testURLDaily
## [1] "https://archive-api.open-meteo.com/v1/archive?latitude=25.78&longitude=-80.21&start_date=2010-01-01&end_date=2023-12-31&daily=weathercode,temperature_2m_max,temperature_2m_min,apparent_temperature_max,apparent_temperature_min,precipitation_sum,rain_sum,snowfall_sum,precipitation_hours,sunrise,sunset,windspeed_10m_max,windgusts_10m_max,winddirection_10m_dominant,shortwave_radiation_sum,et0_fao_evapotranspiration&timezone=US%2FEastern"
# Download file
if(!file.exists("testOM_daily_mia.json")) {
    fileDownload(fileName="testOM_daily_mia.json", url=testURLDaily)
} else {
    cat("\nFile testOM_daily_mia.json already exists, skipping download\n")
}
## 
## File testOM_daily_mia.json already exists, skipping download

The daily and hourly datasets are loaded:

# Read daily JSON file
miaOMDaily <- formatOpenMeteoJSON("testOM_daily_mia.json")
## 
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, daily_units, daily 
## 
## $tblDaily
## # A tibble: 5,113 × 18
##    date       time       weathercode temperature_2m_max temperature_2m_min
##    <date>     <chr>            <int>              <dbl>              <dbl>
##  1 2010-01-01 2010-01-01          53               26.6               17.5
##  2 2010-01-02 2010-01-02           1               18                 11.6
##  3 2010-01-03 2010-01-03          51               16.7               11.3
##  4 2010-01-04 2010-01-04           3               15.5                9  
##  5 2010-01-05 2010-01-05           3               14.9                9.4
##  6 2010-01-06 2010-01-06           1               13.8                6.3
##  7 2010-01-07 2010-01-07           1               16.6                8.6
##  8 2010-01-08 2010-01-08           2               22.3               11.6
##  9 2010-01-09 2010-01-09          61               18.3                6.8
## 10 2010-01-10 2010-01-10           3                9.3                3.3
## # ℹ 5,103 more rows
## # ℹ 13 more variables: apparent_temperature_max <dbl>,
## #   apparent_temperature_min <dbl>, precipitation_sum <dbl>, rain_sum <dbl>,
## #   snowfall_sum <dbl>, precipitation_hours <dbl>, sunrise <chr>, sunset <chr>,
## #   windspeed_10m_max <dbl>, windgusts_10m_max <dbl>,
## #   winddirection_10m_dominant <int>, shortwave_radiation_sum <dbl>,
## #   et0_fao_evapotranspiration <dbl>
## 
## $tblHourly
## NULL
## 
## $tblUnits
## # A tibble: 17 × 4
##    metricType  name                       value      description                
##    <chr>       <chr>                      <chr>      <chr>                      
##  1 daily_units time                       "iso8601"  <NA>                       
##  2 daily_units weathercode                "wmo code" The most severe weather co…
##  3 daily_units temperature_2m_max         "deg C"    Maximum and minimum daily …
##  4 daily_units temperature_2m_min         "deg C"    Maximum and minimum daily …
##  5 daily_units apparent_temperature_max   "deg C"    Maximum and minimum daily …
##  6 daily_units apparent_temperature_min   "deg C"    Maximum and minimum daily …
##  7 daily_units precipitation_sum          "mm"       Sum of daily precipitation…
##  8 daily_units rain_sum                   "mm"       Sum of daily rain          
##  9 daily_units snowfall_sum               "cm"       Sum of daily snowfall      
## 10 daily_units precipitation_hours        "h"        The number of hours with r…
## 11 daily_units sunrise                    "iso8601"  Sun rise and set times     
## 12 daily_units sunset                     "iso8601"  Sun rise and set times     
## 13 daily_units windspeed_10m_max          "km/h"     Maximum wind speed and gus…
## 14 daily_units windgusts_10m_max          "km/h"     Maximum wind speed and gus…
## 15 daily_units winddirection_10m_dominant "deg "     Dominant wind direction    
## 16 daily_units shortwave_radiation_sum    "MJ/m²"    The sum of solar radiaion …
## 17 daily_units et0_fao_evapotranspiration "mm"       Daily sum of ET0 Reference…
## 
## $tblDescription
## # A tibble: 1 × 7
##   latitude longitude generationtime_ms utc_offset_seconds timezone  
##      <dbl>     <dbl>             <dbl>              <int> <chr>     
## 1     25.8     -80.2              122.             -14400 US/Eastern
## # ℹ 2 more variables: timezone_abbreviation <chr>, elevation <dbl>
## 
## 
## latitude: 25.7645
## longitude: -80.19607
## generationtime_ms: 121.9139
## utc_offset_seconds: -14400
## timezone: US/Eastern
## timezone_abbreviation: EDT
## elevation: 4
# Read hourly JSON file
miaTemp <- formatOpenMeteoJSON("testOM_hourly_mia.json", addVars=TRUE)
## 
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, hourly_units, hourly 
## 
## $tblDaily
## NULL
## 
## $tblHourly
## # A tibble: 122,712 × 37
##    time                date        hour temperature_2m relativehumidity_2m
##    <dttm>              <date>     <int>          <dbl>               <int>
##  1 2010-01-01 00:00:00 2010-01-01     0           23                    74
##  2 2010-01-01 01:00:00 2010-01-01     1           22.9                  73
##  3 2010-01-01 02:00:00 2010-01-01     2           22.8                  72
##  4 2010-01-01 03:00:00 2010-01-01     3           21.9                  84
##  5 2010-01-01 04:00:00 2010-01-01     4           21.7                  83
##  6 2010-01-01 05:00:00 2010-01-01     5           21.7                  82
##  7 2010-01-01 06:00:00 2010-01-01     6           21.6                  82
##  8 2010-01-01 07:00:00 2010-01-01     7           21.7                  82
##  9 2010-01-01 08:00:00 2010-01-01     8           21.7                  82
## 10 2010-01-01 09:00:00 2010-01-01     9           22                    81
## # ℹ 122,702 more rows
## # ℹ 32 more variables: dewpoint_2m <dbl>, apparent_temperature <dbl>,
## #   pressure_msl <dbl>, surface_pressure <dbl>, precipitation <dbl>,
## #   rain <dbl>, snowfall <dbl>, cloudcover <int>, cloudcover_low <int>,
## #   cloudcover_mid <int>, cloudcover_high <int>, shortwave_radiation <dbl>,
## #   direct_radiation <dbl>, direct_normal_irradiance <dbl>,
## #   diffuse_radiation <dbl>, windspeed_10m <dbl>, windspeed_100m <dbl>, …
## 
## $tblUnits
## # A tibble: 34 × 4
##    metricType   name                 value   description                        
##    <chr>        <chr>                <chr>   <chr>                              
##  1 hourly_units time                 iso8601 <NA>                               
##  2 hourly_units temperature_2m       deg C   Air temperature at 2 meters above …
##  3 hourly_units relativehumidity_2m  %       Relative humidity at 2 meters abov…
##  4 hourly_units dewpoint_2m          deg C   Dew point temperature at 2 meters …
##  5 hourly_units apparent_temperature deg C   Apparent temperature is the percei…
##  6 hourly_units pressure_msl         hPa     Atmospheric air pressure reduced t…
##  7 hourly_units surface_pressure     hPa     Atmospheric air pressure reduced t…
##  8 hourly_units precipitation        mm      Total precipitation (rain, showers…
##  9 hourly_units rain                 mm      Only liquid precipitation of the p…
## 10 hourly_units snowfall             cm      Snowfall amount of the preceding h…
## # ℹ 24 more rows
## 
## $tblDescription
## # A tibble: 1 × 7
##   latitude longitude generationtime_ms utc_offset_seconds timezone  
##      <dbl>     <dbl>             <dbl>              <int> <chr>     
## 1     25.8     -80.2              180.             -14400 US/Eastern
## # ℹ 2 more variables: timezone_abbreviation <chr>, elevation <dbl>
## 
## 
## latitude: 25.7645
## longitude: -80.19607
## generationtime_ms: 179.885
## utc_offset_seconds: -14400
## timezone: US/Eastern
## timezone_abbreviation: EDT
## elevation: 4
## 
## Rows: 122,712
## Columns: 80
## $ time                              <dttm> 2010-01-01 00:00:00, 2010-01-01 01:…
## $ date                              <date> 2010-01-01, 2010-01-01, 2010-01-01,…
## $ hour                              <int> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11…
## $ temperature_2m                    <dbl> 23.0, 22.9, 22.8, 21.9, 21.7, 21.7, …
## $ relativehumidity_2m               <int> 74, 73, 72, 84, 83, 82, 82, 82, 82, …
## $ dewpoint_2m                       <dbl> 18.1, 17.8, 17.5, 19.1, 18.8, 18.5, …
## $ apparent_temperature              <dbl> 24.9, 24.8, 24.6, 24.2, 23.5, 23.2, …
## $ pressure_msl                      <dbl> 1019.7, 1019.5, 1018.8, 1018.3, 1017…
## $ surface_pressure                  <dbl> 1019.2, 1019.0, 1018.3, 1017.8, 1017…
## $ precipitation                     <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0…
## $ rain                              <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0…
## $ snowfall                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cloudcover                        <int> 43, 50, 42, 40, 40, 45, 33, 51, 43, …
## $ cloudcover_low                    <int> 12, 14, 9, 3, 2, 5, 9, 29, 25, 12, 1…
## $ cloudcover_mid                    <int> 6, 14, 10, 16, 15, 17, 21, 25, 3, 11…
## $ cloudcover_high                   <int> 96, 96, 92, 93, 98, 100, 42, 33, 63,…
## $ shortwave_radiation               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 29, 150, …
## $ direct_radiation                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 61, 12…
## $ direct_normal_irradiance          <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0…
## $ diffuse_radiation                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 89, 1…
## $ windspeed_10m                     <dbl> 7.1, 6.4, 5.7, 7.2, 9.8, 11.5, 10.7,…
## $ windspeed_100m                    <dbl> 9.6, 8.5, 8.7, 10.8, 13.0, 14.4, 15.…
## $ winddirection_10m                 <int> 210, 232, 198, 180, 172, 182, 213, 2…
## $ winddirection_100m                <int> 193, 208, 187, 176, 174, 180, 207, 2…
## $ windgusts_10m                     <dbl> 18.4, 18.7, 18.4, 11.2, 15.1, 17.6, …
## $ et0_fao_evapotranspiration        <dbl> 0.02, 0.02, 0.01, 0.00, 0.01, 0.02, …
## $ weathercode                       <int> 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, …
## $ vapor_pressure_deficit            <dbl> 0.72, 0.76, 0.77, 0.41, 0.43, 0.47, …
## $ soil_temperature_0_to_7cm         <dbl> 22.3, 22.1, 21.9, 21.5, 20.9, 20.8, …
## $ soil_temperature_7_to_28cm        <dbl> 23.2, 23.2, 23.1, 23.1, 23.0, 22.9, …
## $ soil_temperature_28_to_100cm      <dbl> 23.2, 23.2, 23.2, 23.2, 23.2, 23.2, …
## $ soil_temperature_100_to_255cm     <dbl> 26.3, 26.3, 26.3, 26.3, 26.3, 26.3, …
## $ soil_moisture_0_to_7cm            <dbl> 0.052, 0.052, 0.052, 0.053, 0.053, 0…
## $ soil_moisture_7_to_28cm           <dbl> 0.135, 0.135, 0.135, 0.135, 0.135, 0…
## $ soil_moisture_28_to_100cm         <dbl> 0.151, 0.151, 0.151, 0.151, 0.151, 0…
## $ soil_moisture_100_to_255cm        <dbl> 0.142, 0.142, 0.142, 0.142, 0.142, 0…
## $ origTime                          <chr> "2010-01-01T00:00", "2010-01-01T01:0…
## $ year                              <dbl> 2010, 2010, 2010, 2010, 2010, 2010, …
## $ month                             <fct> Jan, Jan, Jan, Jan, Jan, Jan, Jan, J…
## $ fct_hour                          <fct> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11…
## $ tod                               <fct> Night, Night, Night, Night, Night, N…
## $ doy                               <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ season                            <fct> Winter, Winter, Winter, Winter, Wint…
## $ todSeason                         <fct> Winter-Night, Winter-Night, Winter-N…
## $ pct_hour                          <dbl> 0, 4, 8, 13, 17, 21, 25, 29, 33, 38,…
## $ pct_temperature_2m                <dbl> 25, 24, 23, 17, 16, 16, 15, 16, 16, …
## $ pct_relativehumidity_2m           <dbl> 41, 38, 36, 73, 69, 66, 66, 66, 66, …
## $ pct_dewpoint_2m                   <dbl> 27, 25, 24, 33, 31, 29, 29, 29, 29, …
## $ pct_apparent_temperature          <dbl> 31, 30, 29, 27, 23, 22, 22, 22, 22, …
## $ pct_pressure_msl                  <dbl> 79, 77, 70, 65, 58, 52, 40, 43, 39, …
## $ pct_surface_pressure              <dbl> 79, 77, 70, 65, 58, 52, 40, 43, 39, …
## $ pct_precipitation                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_rain                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_snowfall                      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_cloudcover                    <dbl> 64, 72, 63, 60, 60, 67, 50, 73, 64, …
## $ pct_cloudcover_low                <dbl> 53, 57, 44, 26, 22, 32, 44, 79, 75, …
## $ pct_cloudcover_mid                <dbl> 43, 60, 52, 63, 61, 65, 70, 75, 35, …
## $ pct_cloudcover_high               <dbl> 83, 83, 79, 80, 86, 91, 60, 57, 66, …
## $ pct_shortwave_radiation           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 61, 6…
## $ pct_direct_radiation              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 61, 6…
## $ pct_direct_normal_irradiance      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, 63, 6…
## $ pct_diffuse_radiation             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, 65, 8…
## $ pct_windspeed_10m                 <dbl> 18, 15, 12, 19, 33, 43, 38, 35, 37, …
## $ pct_windspeed_100m                <dbl> 17, 13, 14, 21, 30, 36, 41, 41, 41, …
## $ pct_winddirection_10m             <dbl> 76, 79, 73, 68, 66, 69, 76, 78, 79, …
## $ pct_winddirection_100m            <dbl> 74, 77, 72, 69, 68, 70, 77, 79, 79, …
## $ pct_windgusts_10m                 <dbl> 26, 27, 26, 7, 16, 24, 28, 30, 28, 3…
## $ pct_et0_fao_evapotranspiration    <dbl> 19, 19, 12, 0, 12, 19, 19, 12, 19, 3…
## $ pct_weathercode                   <dbl> 30, 30, 30, 30, 30, 30, 30, 62, 30, …
## $ pct_vapor_pressure_deficit        <dbl> 48, 52, 53, 18, 20, 24, 23, 23, 24, …
## $ pct_soil_temperature_0_to_7cm     <dbl> 18, 17, 16, 14, 11, 11, 10, 10, 11, …
## $ pct_soil_temperature_7_to_28cm    <dbl> 17, 17, 16, 16, 15, 15, 14, 14, 14, …
## $ pct_soil_temperature_28_to_100cm  <dbl> 12, 12, 12, 12, 12, 12, 12, 12, 12, …
## $ pct_soil_temperature_100_to_255cm <dbl> 52, 52, 52, 52, 52, 52, 52, 52, 52, …
## $ pct_soil_moisture_0_to_7cm        <dbl> 18, 18, 18, 19, 19, 20, 20, 20, 20, …
## $ pct_soil_moisture_7_to_28cm       <dbl> 43, 43, 43, 43, 43, 43, 43, 43, 43, …
## $ pct_soil_moisture_28_to_100cm     <dbl> 51, 51, 51, 51, 51, 51, 51, 51, 51, …
## $ pct_soil_moisture_100_to_255cm    <dbl> 38, 38, 38, 38, 38, 38, 38, 38, 38, …
## $ pct_year                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_doy                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …

## # A tibble: 8 × 4
##   todSeason    season tod       n
##   <fct>        <fct>  <fct> <int>
## 1 Spring-Day   Spring Day   15456
## 2 Spring-Night Spring Night 15456
## 3 Summer-Day   Summer Day   15456
## 4 Summer-Night Summer Night 15456
## 5 Fall-Day     Fall   Day   15288
## 6 Fall-Night   Fall   Night 15288
## 7 Winter-Day   Winter Day   15156
## 8 Winter-Night Winter Night 15156
## # A tibble: 24 × 4
##     hour fct_hour tod       n
##    <int> <fct>    <fct> <int>
##  1     0 0        Night  5113
##  2     1 1        Night  5113
##  3     2 2        Night  5113
##  4     3 3        Night  5113
##  5     4 4        Night  5113
##  6     5 5        Night  5113
##  7     6 6        Night  5113
##  8     7 7        Day    5113
##  9     8 8        Day    5113
## 10     9 9        Day    5113
## 11    10 10       Day    5113
## 12    11 11       Day    5113
## 13    12 12       Day    5113
## 14    13 13       Day    5113
## 15    14 14       Day    5113
## 16    15 15       Day    5113
## 17    16 16       Day    5113
## 18    17 17       Day    5113
## 19    18 18       Day    5113
## 20    19 19       Night  5113
## 21    20 20       Night  5113
## 22    21 21       Night  5113
## 23    22 22       Night  5113
## 24    23 23       Night  5113
## # A tibble: 12 × 3
##    month season     n
##    <fct> <fct>  <int>
##  1 Jan   Winter 10416
##  2 Feb   Winter  9480
##  3 Mar   Spring 10416
##  4 Apr   Spring 10080
##  5 May   Spring 10416
##  6 Jun   Summer 10080
##  7 Jul   Summer 10416
##  8 Aug   Summer 10416
##  9 Sep   Fall   10080
## 10 Oct   Fall   10416
## 11 Nov   Fall   10080
## 12 Dec   Winter 10416

An integrated set of all-city test and train data is updated:

# Bind all the data frames
allCity <- list("NYC"=nycTemp, 
                "LA"=laxTemp, 
                "Chicago"=chiTemp, 
                "Houston"=houTemp, 
                "Vegas"=lasTemp, 
                "Miami"=miaTemp
                ) %>%
    bind_rows(.id="src")

# Create the index for training data
set.seed(24101115)
idxTrain_v2 <- sample(1:nrow(allCity), size = round(0.7*nrow(allCity)), replace=FALSE)

# Add test-train flag to full dataset
allCity <- allCity %>%
    mutate(tt=ifelse(row_number() %in% idxTrain_v2, "train", "test"), 
           fct_src=factor(src))
allCity
## # A tibble: 731,496 × 83
##    src   time                date        hour temperature_2m relativehumidity_2m
##    <chr> <dttm>              <date>     <int>          <dbl>               <int>
##  1 NYC   2010-01-01 00:00:00 2010-01-01     0           -1.1                  95
##  2 NYC   2010-01-01 01:00:00 2010-01-01     1           -1                    96
##  3 NYC   2010-01-01 02:00:00 2010-01-01     2           -1                    96
##  4 NYC   2010-01-01 03:00:00 2010-01-01     3           -0.8                  97
##  5 NYC   2010-01-01 04:00:00 2010-01-01     4           -0.9                  97
##  6 NYC   2010-01-01 05:00:00 2010-01-01     5           -0.8                  97
##  7 NYC   2010-01-01 06:00:00 2010-01-01     6           -0.7                  97
##  8 NYC   2010-01-01 07:00:00 2010-01-01     7           -0.5                  97
##  9 NYC   2010-01-01 08:00:00 2010-01-01     8           -0.6                  97
## 10 NYC   2010-01-01 09:00:00 2010-01-01     9           -0.6                  97
## # ℹ 731,486 more rows
## # ℹ 77 more variables: dewpoint_2m <dbl>, apparent_temperature <dbl>,
## #   pressure_msl <dbl>, surface_pressure <dbl>, precipitation <dbl>,
## #   rain <dbl>, snowfall <dbl>, cloudcover <int>, cloudcover_low <int>,
## #   cloudcover_mid <int>, cloudcover_high <int>, shortwave_radiation <dbl>,
## #   direct_radiation <dbl>, direct_normal_irradiance <dbl>,
## #   diffuse_radiation <dbl>, windspeed_10m <dbl>, windspeed_100m <dbl>, …
# Review counts by year
allCity %>% 
    count(year, src, tt) %>% 
    pivot_wider(id_cols=c("src", "tt"), names_from="year", values_from="n")
## # A tibble: 12 × 16
##    src     tt    `2010` `2011` `2012` `2013` `2014` `2015` `2016` `2017` `2018`
##    <chr>   <chr>  <int>  <int>  <int>  <int>  <int>  <int>  <int>  <int>  <int>
##  1 Chicago test    2687   2616   2596   2605   2674   2626   2647   2638   2660
##  2 Chicago train   6073   6144   6188   6155   6086   6134   6137   6122   6100
##  3 Houston test    2608   2595   2611   2573   2702   2609   2602   2613   2597
##  4 Houston train   6152   6165   6173   6187   6058   6151   6182   6147   6163
##  5 LA      test    2661   2519   2675   2671   2632   2655   2640   2589   2637
##  6 LA      train   6099   6241   6109   6089   6128   6105   6144   6171   6123
##  7 Miami   test    2622   2594   2637   2627   2554   2656   2630   2575   2594
##  8 Miami   train   6138   6166   6147   6133   6206   6104   6154   6185   6166
##  9 NYC     test    2629   2596   2639   2633   2653   2619   2669   2650   2624
## 10 NYC     train   6131   6164   6145   6127   6107   6141   6115   6110   6136
## 11 Vegas   test    2659   2549   2614   2695   2619   2655   2610   2601   2585
## 12 Vegas   train   6101   6211   6170   6065   6141   6105   6174   6159   6175
## # ℹ 5 more variables: `2019` <int>, `2020` <int>, `2021` <int>, `2022` <int>,
## #   `2023` <int>

Distributions of several key variables are explored:

keyVars <- c('temperature_2m', 
             'relativehumidity_2m', 
             'dewpoint_2m', 
             'shortwave_radiation', 
             'vapor_pressure_deficit', 
             'soil_temperature_28_to_100cm', 
             'soil_temperature_100_to_255cm', 
             'soil_moisture_28_to_100cm', 
             'soil_moisture_100_to_255cm'
             )

allCity %>%
    colSelector(vecSelect=c("src", keyVars)) %>%
    pivot_longer(cols=-c(src)) %>%
    ggplot(aes(x=src, y=value)) + 
    geom_boxplot(aes(fill=src)) + 
    facet_wrap(~name, scales="free_y") + 
    labs(x=NULL, y=NULL, title="Distribution of Key Metrics by City") + 
    scale_fill_discrete(NULL)

At a glance, Miami seems similar to Houston on several metrics. The scatter of temperature and dewpoint is also explored:

allCity %>% 
    select(t=temperature_2m, d=dewpoint_2m, src) %>% 
    mutate(across(.cols=where(is.numeric), .fns=function(x) round(x))) %>% 
    count(src, t, d) %>% 
    ggplot(aes(x=t, y=d)) + 
    geom_point(aes(size=n, color=src), alpha=0.1) + 
    geom_smooth(aes(color=src, weight=n), method="lm") +
    labs(x="Temperature (C)", y="Dewpoint (C)", title="Temperature vs. Dewpoint", subtitle="Hourly") + 
    scale_color_discrete(NULL) + 
    scale_size_continuous("# Obs")
## `geom_smooth()` using formula = 'y ~ x'

allCity %>% 
    group_by(src) %>%
    summarize(cor_td=cor(temperature_2m, dewpoint_2m))
## # A tibble: 6 × 2
##   src     cor_td
##   <chr>    <dbl>
## 1 Chicago  0.950
## 2 Houston  0.834
## 3 LA       0.273
## 4 Miami    0.792
## 5 NYC      0.919
## 6 Vegas    0.371

Miami exhibits strong correlation between temperature and dewpoint, most similar in magnitude to Houston

A basic rpart model is run excluding Miami, with predictions made on holdout data including Miami:

tstRP2 <- allCity %>% 
    filter(tt=="train", src!="Miami") %>% 
    select(fct_src, all_of(varsTrain)) %>% 
    rpart::rpart(fct_src ~ ., data=., method="class")

# Variable importances
tstRP2$variable.importance %>% 
    as.data.frame() %>% 
    purrr::set_names("varImp") %>% 
    rownames_to_column("predictor")
##                        predictor      varImp
## 1     soil_moisture_100_to_255cm 271772.8906
## 2               surface_pressure 229044.1367
## 3      soil_moisture_28_to_100cm 156413.4398
## 4        soil_moisture_7_to_28cm 154321.9803
## 5         soil_moisture_0_to_7cm 148439.1314
## 6  soil_temperature_100_to_255cm  63015.8130
## 7            relativehumidity_2m  37580.7621
## 8   soil_temperature_28_to_100cm  30614.2617
## 9     soil_temperature_7_to_28cm  21126.8896
## 10                windspeed_100m  17917.6043
## 11     soil_temperature_0_to_7cm  15009.6385
## 12                           doy  14887.9829
## 13          apparent_temperature  13103.0382
## 14                temperature_2m  12479.4021
## 15                          year   5524.8864
## 16                   dewpoint_2m    955.1846
# Predictions
allCity %>% 
    mutate(pred=predict(tstRP2, newdata=., type="class")) %>% 
    count(tt, src, pred) %>% 
    ggplot(aes(x=src, y=pred)) + 
    geom_tile(aes(fill=n)) + 
    scale_fill_continuous(low="white", high="green") + 
    facet_wrap(~tt) + 
    geom_text(aes(label=n), size=2.5)

# Accuracy on holdout
allCity %>% 
    mutate(pred=predict(tstRP2, newdata=., type="class")) %>% 
    group_by(tt, src) %>%
    summarize(acc=mean(src==pred), .groups="drop") %>%
    pivot_wider(id_cols="src", names_from="tt", values_from="acc")
## # A tibble: 6 × 3
##   src      test train
##   <chr>   <dbl> <dbl>
## 1 Chicago 0.995 0.996
## 2 Houston 0.983 0.983
## 3 LA      1.00  1.00 
## 4 Miami   0     0    
## 5 NYC     0.989 0.989
## 6 Vegas   1.00  1.00

A basic tree model works well to split the data by city, and almost always predicts Miami as LA when trained without Miami data

A basic GLM is used to predict whether data are from NYC:

tstGLM <- allCity %>% 
    filter(tt=="train", src!="Miami") %>% 
    mutate(isNYC=ifelse(src=="NYC", 1, 0)) %>%
    select(isNYC, all_of(varsTrain)) %>% 
    select(-diffuse_radiation) %>% # perfectly correlated with other radiation variables
    glm(isNYC ~ ., data=., family="binomial")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(tstGLM)
## 
## Call:
## glm(formula = isNYC ~ ., family = "binomial", data = .)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.3549  -0.0029   0.0000   0.0000   3.5871  
## 
## Coefficients:
##                                 Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)                   -1.309e+02  8.898e+00  -14.709  < 2e-16 ***
## hour                           4.116e-02  2.769e-03   14.866  < 2e-16 ***
## temperature_2m                -2.705e-01  3.701e-02   -7.309 2.69e-13 ***
## relativehumidity_2m           -7.135e-02  6.491e-03  -10.992  < 2e-16 ***
## dewpoint_2m                    3.692e-01  3.079e-02   11.990  < 2e-16 ***
## apparent_temperature          -1.035e-02  2.213e-02   -0.468 0.639917    
## pressure_msl                  -1.941e+00  1.562e-02 -124.234  < 2e-16 ***
## surface_pressure               1.875e+00  1.486e-02  126.146  < 2e-16 ***
## precipitation                 -1.502e+01  1.196e+01   -1.256 0.209140    
## rain                           1.520e+01  1.196e+01    1.271 0.203792    
## snowfall                       2.211e+01  1.708e+01    1.295 0.195435    
## cloudcover                    -2.399e-04  1.533e-03   -0.157 0.875614    
## cloudcover_low                 3.305e-03  1.146e-03    2.884 0.003926 ** 
## cloudcover_mid                 7.631e-03  8.596e-04    8.877  < 2e-16 ***
## cloudcover_high                9.066e-04  5.577e-04    1.626 0.104041    
## shortwave_radiation           -3.146e-03  5.301e-04   -5.934 2.95e-09 ***
## direct_radiation               1.711e-03  4.701e-04    3.641 0.000272 ***
## direct_normal_irradiance       1.005e-03  1.957e-04    5.137 2.79e-07 ***
## windspeed_10m                 -3.700e-01  1.254e-02  -29.516  < 2e-16 ***
## windspeed_100m                 1.023e-01  7.050e-03   14.516  < 2e-16 ***
## winddirection_10m              2.514e-03  3.510e-04    7.162 7.95e-13 ***
## winddirection_100m             2.248e-03  3.554e-04    6.325 2.53e-10 ***
## windgusts_10m                  7.356e-02  3.730e-03   19.720  < 2e-16 ***
## et0_fao_evapotranspiration     3.975e+00  7.781e-01    5.108 3.25e-07 ***
## weathercode                    1.006e-03  1.177e-03    0.854 0.392997    
## vapor_pressure_deficit        -1.138e+00  1.251e-01   -9.095  < 2e-16 ***
## soil_temperature_0_to_7cm     -3.102e-02  1.628e-02   -1.906 0.056673 .  
## soil_temperature_7_to_28cm    -6.984e-01  1.990e-02  -35.092  < 2e-16 ***
## soil_temperature_28_to_100cm   9.765e-01  1.825e-02   53.513  < 2e-16 ***
## soil_temperature_100_to_255cm -1.945e+00  1.922e-02 -101.232  < 2e-16 ***
## soil_moisture_0_to_7cm        -2.006e+01  5.012e-01  -40.033  < 2e-16 ***
## soil_moisture_7_to_28cm       -3.880e+00  6.384e-01   -6.078 1.22e-09 ***
## soil_moisture_28_to_100cm     -1.590e+01  5.038e-01  -31.557  < 2e-16 ***
## soil_moisture_100_to_255cm    -4.186e+01  6.781e-01  -61.729  < 2e-16 ***
## year                           1.306e-01  4.248e-03   30.733  < 2e-16 ***
## doy                            4.123e-02  4.334e-04   95.130  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 418943  on 426145  degrees of freedom
## Residual deviance:  28148  on 426110  degrees of freedom
## AIC: 28220
## 
## Number of Fisher Scoring iterations: 13
# Predictions
allCity %>% 
    mutate(pred=predict(tstGLM, newdata=., type="response")) %>% 
    ggplot(aes(x=src, y=pred)) + 
    geom_boxplot(fill="lightblue", outlier.shape=NA) + 
    facet_wrap(~tt) + 
    labs(x=NULL, 
         y="Predicted probability of NYC", 
         title="Boxplot for predicted probability of NYC", 
         subtitle="Outliers not plotted"
         )

# Accuracy on holdout using 0.5 as threshold
allCity %>% 
    mutate(pred=predict(tstGLM, newdata=., type="response")) %>% 
    group_by(tt, src) %>%
    summarize(acc=mean(ifelse(src=="NYC", pred>0.5, pred<0.5)), .groups="drop") %>%
    pivot_wider(id_cols="src", names_from="tt", values_from="acc")
## # A tibble: 6 × 3
##   src        test   train
##   <chr>     <dbl>   <dbl>
## 1 Chicago 0.998   0.998  
## 2 Houston 0.974   0.973  
## 3 LA      1       1      
## 4 Miami   0.00244 0.00253
## 5 NYC     0.965   0.964  
## 6 Vegas   1       1

The GLM performs well in differentiating NYC from Vegas, LA, Chicago, and Houston. The GLM (not trained on Miami data) almost always classifies Miami as being NYC

The process is converted to functional form:

singleCityGLM <- function(dfTrain, 
                          dfTest=dfTrain, 
                          srcKey="NYC", 
                          plotPred=TRUE, 
                          printAcc=TRUE, 
                          returnGLM=TRUE
                          ) {
    
    # FUNCTION ARGUMENTS
    # dfTrain: training data
    # dfTest: test data
    # srcKey: cities that will be considered "positive" for GLM
    # plotPred: boolean, should boxplot of probability by actual city be plotted?
    # printAcc: boolean, should report of accuracy by test/train and actual city be reported?
    # returnGLM: boolean, should the GLM model be returned?
    
    tst <- dfTrain %>% 
        mutate(isKey=ifelse(src %in% srcKey, 1, 0)) %>%
        select(isKey, all_of(varsTrain)) %>% 
        select(-diffuse_radiation) %>% # perfectly correlated with other radiation variables
        glm(isKey ~ ., data=., family="binomial")

    if(isTRUE(plotPred)) {
        p1 <- dfTest %>% 
            mutate(pred=predict(tst, newdata=., type="response")) %>% 
            ggplot(aes(x=src, y=pred)) + 
            geom_boxplot(fill="lightblue", outlier.shape=NA) + 
            facet_wrap(~tt) + 
            labs(x=NULL, 
                 y=paste0("Predicted probability of ", paste(srcKey, collapse=", ")), 
                 title=paste0("Boxplot for predicted probability of ", paste(srcKey, collapse=", ")), 
                 subtitle="Outliers not plotted"
                 )
        print(p1)
    }
    
    if(isTRUE(printAcc)) {
    
        # Accuracy on holdout using 0.5 as threshold
        dfTest %>% 
            mutate(pred=predict(tst, newdata=., type="response")) %>% 
            group_by(tt, src) %>%
            summarize(acc=mean(ifelse(src %in% srcKey, pred>0.5, pred<0.5)), .groups="drop") %>%
            pivot_wider(id_cols="src", names_from="tt", values_from="acc") %>%
            print()
        
    }
    
    if(isTRUE(returnGLM)) return(tst)
    
}

singleCityGLM(dfTrain=allCity %>% filter(tt=="train"), dfTest=allCity, srcKey=c("LA", "Vegas"))
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## # A tibble: 6 × 3
##   src      test train
##   <chr>   <dbl> <dbl>
## 1 Chicago     1     1
## 2 Houston     1     1
## 3 LA          1     1
## 4 Miami       1     1
## 5 NYC         1     1
## 6 Vegas       1     1
## 
## Call:  glm(formula = isKey ~ ., family = "binomial", data = .)
## 
## Coefficients:
##                   (Intercept)                           hour  
##                     3.883e+02                      5.186e-03  
##                temperature_2m            relativehumidity_2m  
##                     9.358e-01                     -2.736e-02  
##                   dewpoint_2m           apparent_temperature  
##                     8.852e-02                     -6.703e-01  
##                  pressure_msl               surface_pressure  
##                     2.043e+00                     -2.113e+00  
##                 precipitation                           rain  
##                    -1.308e+01                      1.276e+01  
##                      snowfall                     cloudcover  
##                     2.189e+01                     -3.161e-04  
##                cloudcover_low                 cloudcover_mid  
##                     8.070e-03                     -7.422e-03  
##               cloudcover_high            shortwave_radiation  
##                    -3.322e-03                     -2.728e-03  
##              direct_radiation       direct_normal_irradiance  
##                     6.478e-03                     -2.116e-03  
##                 windspeed_10m                 windspeed_100m  
##                    -6.067e-01                      8.559e-02  
##             winddirection_10m             winddirection_100m  
##                     1.425e-03                     -2.378e-03  
##                 windgusts_10m     et0_fao_evapotranspiration  
##                     1.360e-01                      2.071e+00  
##                   weathercode         vapor_pressure_deficit  
##                    -2.007e-02                     -4.486e-01  
##     soil_temperature_0_to_7cm     soil_temperature_7_to_28cm  
##                    -1.376e-01                      3.282e-01  
##  soil_temperature_28_to_100cm  soil_temperature_100_to_255cm  
##                    -2.729e-01                      4.133e-01  
##        soil_moisture_0_to_7cm        soil_moisture_7_to_28cm  
##                     1.301e+01                     -6.936e+00  
##     soil_moisture_28_to_100cm     soil_moisture_100_to_255cm  
##                     1.524e+01                     -1.070e+02  
##                          year                            doy  
##                    -1.815e-01                     -1.346e-02  
## 
## Degrees of Freedom: 512046 Total (i.e. Null);  512011 Residual
## Null Deviance:       653500 
## Residual Deviance: 1.711e-05     AIC: 72

LA and Vegas are sufficiently different from the other cities in the full dataset that the GLM has perfect discriminating power

The function is run for each city, with results collected in a list:

lstGLM <- lapply(allCity %>% pull(src) %>% unique, 
                 FUN=function(x) singleCityGLM(dfTrain=allCity %>% filter(tt=="train"), 
                                               dfTest=allCity, 
                                               srcKey=x, 
                                               printAcc=FALSE
                                               )
                 )
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

names(lstGLM) <- allCity %>% pull(src) %>% unique
str(lstGLM, max.level=1)
## List of 6
##  $ NYC    :List of 30
##   ..- attr(*, "class")= chr [1:2] "glm" "lm"
##  $ LA     :List of 30
##   ..- attr(*, "class")= chr [1:2] "glm" "lm"
##  $ Chicago:List of 30
##   ..- attr(*, "class")= chr [1:2] "glm" "lm"
##  $ Houston:List of 30
##   ..- attr(*, "class")= chr [1:2] "glm" "lm"
##  $ Vegas  :List of 30
##   ..- attr(*, "class")= chr [1:2] "glm" "lm"
##  $ Miami  :List of 30
##   ..- attr(*, "class")= chr [1:2] "glm" "lm"

Performance of the single-city models is assessed, first using deviance:

sapply(lstGLM, FUN=function(x) c(x$null.deviance, x$deviance)) %>% 
    t() %>% 
    as.data.frame() %>% 
    purrr::set_names("nulldeviance", "deviance") %>% 
    rownames_to_column("city") %>% 
    pivot_longer(cols=-c(city)) %>% 
    ggplot(aes(x=fct_reorder(city, value, min), y=value)) + 
    geom_point(aes(color=name)) + 
    coord_flip() + 
    scale_y_log10() + 
    labs(y="Deviance (log-10 scale)", x=NULL, title="singleCityGLM() deviance by single-city runs") + 
    scale_color_discrete(NULL) + 
    theme(legend.position="bottom")

The single-city models perform best on Vegas, Miami, and Houston, with essentially perfect discrimination. The model struggles the most with predictions of Chicago

Accuracy is further explored, using 0.5 as the threshold for predicting “yes”:

glmPreds <- map_dfr(.x=names(lstGLM), 
                    .f=function(x) select(mutate(allCity[, ], 
                                                 glm=x, 
                                                 pred=predict(lstGLM[[x]], 
                                                              newdata=allCity[, ], 
                                                              type="response"
                                                              ), 
                                                 rn=row_number()
                                                 ), 
                                          src, 
                                          glm, 
                                          pred, 
                                          rn
                                          )
                    )

glmPreds %>% 
    group_by(src, glm) %>% 
    summarize(acc=mean(ifelse(src==glm, pred>=0.5, pred<0.5)), .groups="drop") %>% 
    pivot_wider(id_cols="glm", names_from="src", values_from="acc")
## # A tibble: 6 × 7
##   glm     Chicago Houston    LA Miami   NYC Vegas
##   <chr>     <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Chicago   0.704   1.00  0.989 0.987 0.904 0.966
## 2 Houston   1       1     1     1     1     1    
## 3 LA        0.978   0.999 0.792 0.934 0.994 0.921
## 4 Miami     1       1     1     1     1     1    
## 5 NYC       0.996   0.957 1     0.988 0.925 1    
## 6 Vegas     1       1     1     1     1     1

Predictions are further assessed by looking at the highest predicted probability by actual city:

glmPreds %>% 
    group_by(src, rn) %>% 
    filter(pred==max(pred)) %>%
    mutate(wt=1/n()) %>%
    group_by(src, glm) %>%
    summarize(n=sum(wt), .groups="drop") %>%
    group_by(src) %>%
    mutate(pct=n/sum(n)) %>%
    ungroup() %>%
    pivot_wider(id_cols="src", names_from="glm", values_from="pct")
## # A tibble: 6 × 7
##   src      Chicago       LA       NYC Houston Miami Vegas
##   <chr>      <dbl>    <dbl>     <dbl>   <dbl> <dbl> <dbl>
## 1 Chicago  0.962    0.0372   0.000367      NA    NA    NA
## 2 Houston NA       NA       NA              1    NA    NA
## 3 LA       0.0169   0.983   NA             NA    NA    NA
## 4 Miami   NA       NA       NA             NA     1    NA
## 5 NYC      0.00321  0.00103  0.996         NA    NA    NA
## 6 Vegas   NA       NA       NA             NA    NA     1

Predictions based on maximum probability by singleCityGLM() are highly accurate. Miami is predicted on a standalone basis, using only the non-Miami models:

glmPreds %>% 
    filter(src=="Miami", glm!="Miami") %>%
    group_by(src, rn) %>% 
    filter(pred==max(pred)) %>%
    mutate(wt=1/n()) %>%
    group_by(src, glm) %>%
    summarize(n=sum(wt), .groups="drop") %>%
    group_by(src) %>%
    mutate(pct=n/sum(n)) %>%
    ungroup() %>%
    pivot_wider(id_cols="src", names_from="glm", values_from="pct")
## # A tibble: 1 × 4
##   src   Chicago    LA    NYC
##   <chr>   <dbl> <dbl>  <dbl>
## 1 Miami   0.527 0.423 0.0505

In contrast to the rpart model, the GLM trained without Miami predict Miami roughly 50/50 as Chicago or LA when looking only at maximum predicted probability

A basic rpart model is run to predict todSeason on all cities, with predictions made on holdout data:

tstTSRP <- allCity %>% 
    filter(tt=="train") %>% 
    select(todSeason, all_of(varsTrain)) %>% 
    rpart::rpart(todSeason ~ ., data=., method="class")

# Variable importances
tstTSRP$variable.importance %>% 
    as.data.frame() %>% 
    purrr::set_names("varImp") %>% 
    rownames_to_column("predictor")
##                        predictor    varImp
## 1                            doy 213442.55
## 2              diffuse_radiation 154136.55
## 3            shortwave_radiation 150774.82
## 4               direct_radiation 139385.18
## 5       direct_normal_irradiance 135892.30
## 6     et0_fao_evapotranspiration 113132.81
## 7                           hour 111401.37
## 8     soil_temperature_7_to_28cm  55988.21
## 9      soil_temperature_0_to_7cm  53509.74
## 10          apparent_temperature  41391.75
## 11  soil_temperature_28_to_100cm  38843.77
## 12                temperature_2m  38484.11
## 13 soil_temperature_100_to_255cm  30205.69
## 14                   dewpoint_2m  13407.34
# Predictions
allCity %>% 
    mutate(pred=predict(tstTSRP, newdata=., type="class")) %>% 
    count(tt, todSeason, pred) %>% 
    ggplot(aes(x=todSeason, y=pred)) + 
    geom_tile(aes(fill=n)) + 
    scale_fill_continuous(low="white", high="green") + 
    facet_wrap(~tt) + 
    geom_text(aes(label=n), size=2.5)

# Accuracy on holdout
allCity %>% 
    mutate(pred=predict(tstTSRP, newdata=., type="class")) %>% 
    group_by(tt, todSeason) %>%
    summarize(acc=mean(todSeason==pred), .groups="drop") %>%
    pivot_wider(id_cols="todSeason", names_from="tt", values_from="acc")
## # A tibble: 8 × 3
##   todSeason     test train
##   <fct>        <dbl> <dbl>
## 1 Spring-Day   0.898 0.899
## 2 Spring-Night 0.998 0.998
## 3 Summer-Day   0.909 0.912
## 4 Summer-Night 0.998 0.998
## 5 Fall-Day     0.930 0.928
## 6 Fall-Night   0.928 0.932
## 7 Winter-Day   0.840 0.840
## 8 Winter-Night 0.961 0.959

A basic tree model works well to predict todSeason, partly by using leaks of “doy” and “hour”. The model is updated to use only the leaks:

tstTSRP_v2 <- allCity %>% 
    filter(tt=="train") %>% 
    select(todSeason, doy, hour) %>% 
    rpart::rpart(todSeason ~ ., data=., method="class")

# Variable importances
tstTSRP_v2$variable.importance %>% 
    as.data.frame() %>% 
    purrr::set_names("varImp") %>% 
    rownames_to_column("predictor")
##   predictor   varImp
## 1      hour 254802.3
## 2       doy 190792.1
# Predictions
allCity %>% 
    mutate(pred=predict(tstTSRP_v2, newdata=., type="class"), 
           leapyear=ifelse(year%%4==0, "leap", "not leap")
           ) %>% 
    count(tt, leapyear, todSeason, pred) %>% 
    ggplot(aes(x=todSeason, y=pred)) + 
    geom_tile(aes(fill=n)) + 
    scale_fill_continuous(low="white", high="green") + 
    facet_grid(leapyear~tt) + 
    geom_text(aes(label=n), size=2.5)

# Accuracy on holdout
allCity %>% 
    mutate(pred=predict(tstTSRP_v2, newdata=., type="class")) %>% 
    group_by(tt, todSeason) %>%
    summarize(acc=mean(todSeason==pred), .groups="drop") %>%
    pivot_wider(id_cols="todSeason", names_from="tt", values_from="acc")
## # A tibble: 8 × 3
##   todSeason     test train
##   <fct>        <dbl> <dbl>
## 1 Spring-Day   0.998 0.998
## 2 Spring-Night 0.998 0.998
## 3 Summer-Day   0.997 0.998
## 4 Summer-Night 0.998 0.998
## 5 Fall-Day     0.998 0.998
## 6 Fall-Night   0.998 0.998
## 7 Winter-Day   0.997 0.998
## 8 Winter-Night 0.998 0.997

Allowing for the two leaks, the model has perfect explanatory and predictive power outside of leap years, and is off by a day during leap years

The model is explored without the two leaks as predictors:

tstTSRP_v3 <- allCity %>% 
    filter(tt=="train") %>% 
    select(todSeason, all_of(varsTrain)) %>% 
    select(-doy, -hour) %>%
    rpart::rpart(todSeason ~ ., data=., method="class")

# Variable importances
tstTSRP_v3$variable.importance %>% 
    as.data.frame() %>% 
    purrr::set_names("varImp") %>% 
    rownames_to_column("predictor")
##                        predictor       varImp
## 1     soil_temperature_7_to_28cm 60256.427215
## 2   soil_temperature_28_to_100cm 55928.778080
## 3      soil_temperature_0_to_7cm 47282.553312
## 4              diffuse_radiation 43440.791685
## 5           apparent_temperature 43236.142507
## 6            shortwave_radiation 43176.705150
## 7                 temperature_2m 41109.978654
## 8       direct_normal_irradiance 40939.725786
## 9               direct_radiation 40939.725786
## 10 soil_temperature_100_to_255cm 33890.021542
## 11    et0_fao_evapotranspiration 30417.268394
## 12                   dewpoint_2m 29899.563671
## 13        vapor_pressure_deficit 17076.882075
## 14    soil_moisture_100_to_255cm 14683.504574
## 15     soil_moisture_28_to_100cm 13088.226784
## 16       soil_moisture_7_to_28cm 10973.939391
## 17        soil_moisture_0_to_7cm  8200.501825
## 18              surface_pressure  6099.586377
## 19                windspeed_100m  1232.395460
## 20                 windgusts_10m     2.293632
# Predictions
allCity %>% 
    mutate(pred=predict(tstTSRP_v3, newdata=., type="class")) %>% 
    count(tt, todSeason, pred) %>% 
    ggplot(aes(x=todSeason, y=pred)) + 
    geom_tile(aes(fill=n)) + 
    scale_fill_continuous(low="white", high="green") + 
    facet_wrap(~tt) + 
    geom_text(aes(label=n), size=2.5)

# Accuracy on holdout
allCity %>% 
    mutate(pred=predict(tstTSRP_v3, newdata=., type="class")) %>% 
    group_by(tt, todSeason) %>%
    summarize(acc=mean(todSeason==pred), .groups="drop") %>%
    pivot_wider(id_cols="todSeason", names_from="tt", values_from="acc")
## # A tibble: 8 × 3
##   todSeason     test train
##   <fct>        <dbl> <dbl>
## 1 Spring-Day   0.345 0.345
## 2 Spring-Night 0.493 0.493
## 3 Summer-Day   0.782 0.787
## 4 Summer-Night 0.614 0.614
## 5 Fall-Day     0.518 0.515
## 6 Fall-Night   0.572 0.579
## 7 Winter-Day   0.539 0.533
## 8 Winter-Night 0.719 0.715

Accuracy is similar between test and train, so the model is learning generalized predictors. Overall accuracy hovers around 55%

The model is updated to include source, since deep soil temperature and time of year often have reproducible seasonal trends in a given locale:

tstTSRP_v4 <- allCity %>% 
    filter(tt=="train") %>% 
    select(todSeason, all_of(varsTrain), fct_src) %>% 
    select(-doy, -hour) %>%
    rpart::rpart(todSeason ~ ., data=., method="class")

# Variable importances
tstTSRP_v4$variable.importance %>% 
    as.data.frame() %>% 
    purrr::set_names("varImp") %>% 
    rownames_to_column("predictor")
##                        predictor       varImp
## 1     soil_temperature_7_to_28cm 6.277409e+04
## 2   soil_temperature_28_to_100cm 5.994402e+04
## 3                        fct_src 5.138674e+04
## 4      soil_temperature_0_to_7cm 4.969776e+04
## 5  soil_temperature_100_to_255cm 4.937399e+04
## 6           apparent_temperature 4.377636e+04
## 7              diffuse_radiation 4.344079e+04
## 8                 temperature_2m 4.326572e+04
## 9            shortwave_radiation 4.316556e+04
## 10      direct_normal_irradiance 4.093973e+04
## 11              direct_radiation 4.093973e+04
## 12    et0_fao_evapotranspiration 3.041727e+04
## 13                   dewpoint_2m 2.862273e+04
## 14        vapor_pressure_deficit 1.566781e+04
## 15     soil_moisture_28_to_100cm 1.140520e+04
## 16              surface_pressure 8.989249e+03
## 17    soil_moisture_100_to_255cm 8.955969e+03
## 18       soil_moisture_7_to_28cm 4.650056e+03
## 19        soil_moisture_0_to_7cm 4.389984e+03
## 20                  pressure_msl 6.067136e+02
## 21                 windspeed_10m 2.236557e+02
## 22                windspeed_100m 1.809909e+02
## 23                      snowfall 5.403460e+01
## 24                   weathercode 5.403460e+01
## 25                 windgusts_10m 3.112607e-01
# Predictions
allCity %>% 
    mutate(pred=predict(tstTSRP_v4, newdata=., type="class")) %>% 
    count(tt, todSeason, pred) %>% 
    ggplot(aes(x=todSeason, y=pred)) + 
    geom_tile(aes(fill=n)) + 
    scale_fill_continuous(low="white", high="green") + 
    facet_wrap(~tt) + 
    geom_text(aes(label=n), size=2.5)

# Accuracy on holdout
allCity %>% 
    mutate(pred=predict(tstTSRP_v4, newdata=., type="class")) %>% 
    group_by(tt, todSeason) %>%
    summarize(acc=mean(todSeason==pred), .groups="drop") %>%
    pivot_wider(id_cols="todSeason", names_from="tt", values_from="acc")
## # A tibble: 8 × 3
##   todSeason     test train
##   <fct>        <dbl> <dbl>
## 1 Spring-Day   0.676 0.681
## 2 Spring-Night 0.539 0.541
## 3 Summer-Day   0.746 0.749
## 4 Summer-Night 0.610 0.609
## 5 Fall-Day     0.648 0.645
## 6 Fall-Night   0.708 0.714
## 7 Winter-Day   0.602 0.598
## 8 Winter-Night 0.734 0.730

Accuracy remains similar between test and train, so the model is learning generalized predictors. Overall accuracy increases to 65%

For comparison, the random forest is run using all variables, including the leaks:

keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022), 
          yVar="todSeason", 
          xVars=c(varsTrain), 
          dfTest=allCity %>% filter(tt=="test", year==2022), 
          useLabel=keyLabel, 
          useSub=stringr::str_to_sentence(keyLabel), 
          isContVar=FALSE,
          rndTo=-1L,
          refXY=TRUE,
          returnData=FALSE
          )
## Growing trees.. Progress: 23%. Estimated remaining time: 1 minute, 42 seconds.
## Growing trees.. Progress: 51%. Estimated remaining time: 1 minute, 0 seconds.
## Growing trees.. Progress: 76%. Estimated remaining time: 29 seconds.
## Growing trees.. Progress: 99%. Estimated remaining time: 0 seconds.

## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.956%

In contrast to the basic rpart model with all variables, the random forest finds and exploits the leak of ‘doy’ and ‘hour’ to drive accuracy nearly to 100%

The random forest is run using only the leaks of ‘hour’ and ‘doy’:

keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022), 
          yVar="todSeason", 
          xVars=c("hour", "doy"), 
          dfTest=allCity %>% filter(tt=="test", year==2022), 
          useLabel=keyLabel, 
          useSub=stringr::str_to_sentence(keyLabel), 
          isContVar=FALSE,
          mtry=2,
          rndTo=-1L,
          refXY=TRUE,
          returnData=FALSE
          )

## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 100%

Given that the holdout year is a non-leap-year, accuracy with the leaks is 100%

The random forest is then run excluding the leaks of ‘hour’ and ‘doy’:

keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022), 
          yVar="todSeason", 
          xVars=setdiff(varsTrain, c("hour", "doy")), 
          dfTest=allCity %>% filter(tt=="test", year==2022), 
          useLabel=keyLabel, 
          useSub=stringr::str_to_sentence(keyLabel), 
          isContVar=FALSE,
          rndTo=-1L,
          refXY=TRUE,
          returnData=FALSE
          )
## Growing trees.. Progress: 20%. Estimated remaining time: 2 minutes, 7 seconds.
## Growing trees.. Progress: 39%. Estimated remaining time: 1 minute, 35 seconds.
## Growing trees.. Progress: 59%. Estimated remaining time: 1 minute, 4 seconds.
## Growing trees.. Progress: 77%. Estimated remaining time: 36 seconds.
## Growing trees.. Progress: 95%. Estimated remaining time: 8 seconds.

## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 83.824%

In contrast to the basic rpart model (accuracy ~ 55%), the random forest drives accuracy to ~85% with predictors that exclude the leaks of ‘doy’ and ‘hour’

Predictor ‘source’ is added to the random forest excluding the leaks of ‘hour’ and ‘doy’:

keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022), 
          yVar="todSeason", 
          xVars=c("fct_src", setdiff(varsTrain, c("hour", "doy"))), 
          dfTest=allCity %>% filter(tt=="test", year==2022), 
          useLabel=keyLabel, 
          useSub=stringr::str_to_sentence(keyLabel), 
          isContVar=FALSE,
          rndTo=-1L,
          refXY=TRUE,
          returnData=FALSE
          )
## Growing trees.. Progress: 19%. Estimated remaining time: 2 minutes, 15 seconds.
## Growing trees.. Progress: 36%. Estimated remaining time: 1 minute, 49 seconds.
## Growing trees.. Progress: 52%. Estimated remaining time: 1 minute, 25 seconds.
## Growing trees.. Progress: 69%. Estimated remaining time: 56 seconds.
## Growing trees.. Progress: 85%. Estimated remaining time: 27 seconds.

## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 83.995%

Unlike the basic rpart model, adding source does not improve random forest accuracy

Each combination of two variables is run through the random forest, using a smaller training dataset:

# Variables to explore
useTODS <- c(varsTrain[!str_detect(varsTrain, "todSeason")], "src", "month")
useTODS
##  [1] "hour"                          "temperature_2m"               
##  [3] "relativehumidity_2m"           "dewpoint_2m"                  
##  [5] "apparent_temperature"          "pressure_msl"                 
##  [7] "surface_pressure"              "precipitation"                
##  [9] "rain"                          "snowfall"                     
## [11] "cloudcover"                    "cloudcover_low"               
## [13] "cloudcover_mid"                "cloudcover_high"              
## [15] "shortwave_radiation"           "direct_radiation"             
## [17] "direct_normal_irradiance"      "diffuse_radiation"            
## [19] "windspeed_10m"                 "windspeed_100m"               
## [21] "winddirection_10m"             "winddirection_100m"           
## [23] "windgusts_10m"                 "et0_fao_evapotranspiration"   
## [25] "weathercode"                   "vapor_pressure_deficit"       
## [27] "soil_temperature_0_to_7cm"     "soil_temperature_7_to_28cm"   
## [29] "soil_temperature_28_to_100cm"  "soil_temperature_100_to_255cm"
## [31] "soil_moisture_0_to_7cm"        "soil_moisture_7_to_28cm"      
## [33] "soil_moisture_28_to_100cm"     "soil_moisture_100_to_255cm"   
## [35] "year"                          "doy"                          
## [37] "src"                           "month"
# Subsets to use (dfTrainCloud and dfTestCloud created previously)
set.seed(24103012)
idxSmallTODS <- sample(1:nrow(dfTrainCloud), 5000, replace=FALSE)
mtxSmallTODS <- matrix(nrow=0, ncol=3)

# Run each combination of variables
for(idx1 in 1:(length(useTODS)-1)) {
    for(idx2 in (idx1+1):length(useTODS)) {
        r2SmallTODS <- runFullRF(dfTrain=dfTrainCloud[idxSmallTODS,] %>% 
                                     mutate(weathercode=factor(weathercode)), 
                                 yVar="todSeason", 
                                 xVars=useTODS[c(idx1, idx2)], 
                                 dfTest=dfTestCloud %>% 
                                     mutate(weathercode=factor(weathercode)), 
                                 useLabel=keyLabel, 
                                 useSub=stringr::str_to_sentence(keyLabel), 
                                 isContVar=FALSE,
                                 mtry=2,
                                 makePlots=FALSE,
                                 returnData=TRUE
                                 )[["rfAcc"]]
        mtxSmallTODS <- rbind(mtxSmallTODS, c(idx1, idx2, r2SmallTODS))
    }
}
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.57%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.575%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.636%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.438%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.612%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.596%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.409%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.737%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.623%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.588%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.575%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.298%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.889%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.189%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.316%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.454%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.098%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.567%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.948%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.735%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.651%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.428%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.74%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.098%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.998%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.217%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.558%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.394%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.633%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.017%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.41%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.938%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.422%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.846%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.81%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.48%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 100%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.463%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.889%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.716%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.566%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.156%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.865%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.193%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.056%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.408%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.086%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.271%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.454%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.736%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.068%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.972%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.807%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.888%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.277%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.515%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.477%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.838%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.889%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.302%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.676%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.863%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.454%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.466%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.509%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.351%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.894%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.185%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.581%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.062%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.417%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.049%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.526%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.272%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.105%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.25%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.417%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.719%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.879%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.001%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.107%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.831%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.359%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.279%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.291%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.661%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.115%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.75%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.165%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.845%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.538%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.959%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.186%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.392%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.387%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.463%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.729%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.306%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.267%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.406%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.795%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.917%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.898%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.046%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.771%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.204%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.833%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.181%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.12%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.169%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.378%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.398%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.596%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.108%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.169%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.931%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.857%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.034%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.469%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.709%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.978%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.418%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.4%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.032%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.169%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.375%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.702%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.846%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.314%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.874%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.181%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.625%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.399%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.799%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.637%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.507%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.593%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.07%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.181%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.941%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.921%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.095%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.054%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.933%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.81%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.993%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.711%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.14%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.185%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.597%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.69%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.671%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.264%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.99%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.298%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.833%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.434%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.498%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.444%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.453%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.521%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.873%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.785%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.587%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.685%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.104%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.103%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.173%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.671%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.97%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.036%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.494%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.846%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.519%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.779%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.089%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.077%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.138%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.214%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.597%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.178%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.582%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.698%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.819%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.997%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.862%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.736%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.597%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.17%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.98%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.767%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.564%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.899%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.069%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.04%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.239%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.932%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.875%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.831%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.972%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.772%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.904%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.957%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.51%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.004%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.894%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.095%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.574%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.391%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.574%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.81%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.978%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.894%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.132%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.703%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.279%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.235%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.972%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.151%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.442%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.991%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.397%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.402%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.354%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.216%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.812%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.224%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.519%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.511%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.361%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.77%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.6%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.303%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.3%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.637%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.788%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.703%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.979%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.603%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.611%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.957%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.807%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.553%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.076%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.996%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.511%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.269%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.491%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.718%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.718%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.647%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.182%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.82%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.067%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.193%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.349%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.848%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.907%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.824%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.363%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.664%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.85%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.702%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.137%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.888%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.369%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.979%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.392%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.695%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.254%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.868%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.576%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.167%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.965%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.876%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.117%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.483%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.771%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.954%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.845%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.251%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.668%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.166%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.155%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.189%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.406%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.488%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.243%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.804%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.581%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.033%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.137%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.182%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.192%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.347%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.086%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.651%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.125%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.738%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.85%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.342%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.722%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.328%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.873%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.659%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.411%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.931%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.134%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.594%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.231%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.61%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.109%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.587%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.795%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.549%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.715%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.056%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.626%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.523%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.482%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.203%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.872%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.164%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.568%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.874%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.553%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.873%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.315%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.41%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.451%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.684%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.098%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.152%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.725%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.609%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.22%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.805%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.607%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.216%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.747%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.982%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.201%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.38%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.121%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.226%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.378%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.959%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.847%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.165%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.994%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.348%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.228%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.82%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.074%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.108%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.461%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.457%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.961%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.992%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.923%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.845%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.365%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.102%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.493%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.538%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.836%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.787%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.741%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.525%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.692%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.677%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.264%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.733%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.969%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.319%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.202%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.868%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.007%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.332%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.862%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.024%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.965%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.03%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.807%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.091%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.83%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.616%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.254%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.128%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.254%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.722%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.789%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.61%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.837%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.127%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.54%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.82%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.639%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.982%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.013%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.156%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.393%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.474%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.735%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.996%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.206%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.994%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.923%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.649%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.259%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.718%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.736%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.094%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.013%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.772%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.081%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.812%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.817%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.083%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.817%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.355%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.581%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.466%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.664%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.661%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.533%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.583%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.215%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.02%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.611%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.034%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.356%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.047%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.476%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.123%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.543%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.854%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.627%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.711%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.21%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.47%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.014%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.217%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.342%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.607%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.978%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.785%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 91.766%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.552%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 91.408%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.279%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.969%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.998%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.495%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.93%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.219%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.988%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.081%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.047%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.532%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.089%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.803%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.908%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.59%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.678%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.537%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.658%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.425%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.734%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.179%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.533%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.453%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.938%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.45%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.452%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.939%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.785%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.511%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.759%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.371%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.822%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.175%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.176%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.187%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.228%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.038%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.783%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.878%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.272%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.728%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.225%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.155%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.95%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.897%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.425%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.643%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.27%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.719%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.73%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.847%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.376%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.597%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.415%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.053%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.418%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.735%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.877%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.981%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.765%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.602%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 91.759%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.805%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 91.568%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.484%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.949%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.639%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.828%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.414%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.365%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.983%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.292%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.145%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.725%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.683%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.599%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.13%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.317%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.675%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.71%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.636%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.815%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.842%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.477%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.868%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.09%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.439%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.961%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.922%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.437%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.495%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.609%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.477%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.005%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.445%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.015%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.574%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.634%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.192%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.335%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.2%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.881%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.668%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.728%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.327%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.465%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.492%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.896%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.812%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.505%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.186%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.193%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.746%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.127%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.779%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.152%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.904%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.078%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.927%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.172%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.327%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.617%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.424%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.642%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.723%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.843%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.698%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.371%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.992%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.784%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.23%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.369%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.112%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.72%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.566%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.026%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.148%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.204%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.447%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.779%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.185%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.201%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.212%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.035%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.178%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.327%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.394%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.14%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.128%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.773%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.723%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.795%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.484%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.552%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.936%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.779%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.815%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.134%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.754%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.355%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 75.756%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.973%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.024%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.821%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.416%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.917%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.37%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.875%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.624%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.69%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.35%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.144%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.226%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.468%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.193%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.136%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.861%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.129%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.044%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.618%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.743%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.08%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.915%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.832%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.142%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.042%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.565%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.036%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.168%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.492%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.164%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.663%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.556%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.726%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.686%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.167%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.115%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.222%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.183%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.614%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.304%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.904%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.348%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.478%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.796%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.38%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.765%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.547%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.621%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.134%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.378%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.36%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.605%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.628%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.616%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.545%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.47%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.613%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.787%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.328%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.89%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.869%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.009%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.402%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.055%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.202%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.34%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.756%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.589%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.88%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.318%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.178%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.075%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.992%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.583%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.588%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.907%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.183%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.158%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.227%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.779%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.019%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.959%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.136%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.81%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.768%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.919%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.341%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.722%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.507%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.179%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.844%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.27%
## 
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.316%